这是DDL -
create table tbl1 (
id number,
value varchar2(50)
);
insert into tbl1 values (1, 'AA, UT, BT, SK, SX');
insert into tbl1 values (2, 'AA, UT, SX');
insert into tbl1 values (3, 'UT, SK, SX, ZF');
Run Code Online (Sandbox Code Playgroud)
注意,这里的值是逗号分隔的字符串.
但是,我们需要结果如下 -
ID VALUE
-------------
1 AA
1 UT
1 BT
1 SK
1 SX
2 AA
2 UT
2 SX
3 UT
3 SK
3 SX
3 ZF
Run Code Online (Sandbox Code Playgroud)
我们如何为此编写SQL?
ver*_*lli 14
我同意这是一个非常糟糕的设计.如果您无法更改该设计,请尝试此操作:
select distinct id, trim(regexp_substr(value,'[^,]+', 1, level) ) value, level
from tbl1
connect by regexp_substr(value, '[^,]+', 1, level) is not null
order by id, level;
Run Code Online (Sandbox Code Playgroud)
OUPUT
id value level
1 AA 1
1 UT 2
1 BT 3
1 SK 4
1 SX 5
2 AA 1
2 UT 2
2 SX 3
3 UT 1
3 SK 2
3 SX 3
3 ZF 4
Run Code Online (Sandbox Code Playgroud)
归功于此
以更优雅和有效的方式删除重复项(致@mathguy)
select id, trim(regexp_substr(value,'[^,]+', 1, level) ) value, level
from tbl1
connect by regexp_substr(value, '[^,]+', 1, level) is not null
and PRIOR id = id
and PRIOR SYS_GUID() is not null
order by id, level;
Run Code Online (Sandbox Code Playgroud)
如果您想要"ANSIer"方法,请使用CTE:
with t (id,res,val,lev) as (
select id, trim(regexp_substr(value,'[^,]+', 1, 1 )) res, value as val, 1 as lev
from tbl1
where regexp_substr(value, '[^,]+', 1, 1) is not null
union all
select id, trim(regexp_substr(val,'[^,]+', 1, lev+1) ) res, val, lev+1 as lev
from t
where regexp_substr(val, '[^,]+', 1, lev+1) is not null
)
select id, res,lev
from t
order by id, lev;
Run Code Online (Sandbox Code Playgroud)
OUTPUT
id val lev
1 AA 1
1 UT 2
1 BT 3
1 SK 4
1 SX 5
2 AA 1
2 UT 2
2 SX 3
3 UT 1
3 SK 2
3 SX 3
3 ZF 4
Run Code Online (Sandbox Code Playgroud)
MT0的另一种递归方法,但没有正则表达式:
WITH t ( id, value, start_pos, end_pos ) AS
( SELECT id, value, 1, INSTR( value, ',' ) FROM tbl1
UNION ALL
SELECT id,
value,
end_pos + 1,
INSTR( value, ',', end_pos + 1 )
FROM t
WHERE end_pos > 0
)
SELECT id,
SUBSTR( value, start_pos, DECODE( end_pos, 0, LENGTH( value ) + 1, end_pos ) - start_pos ) AS value
FROM t
ORDER BY id,
start_pos;
Run Code Online (Sandbox Code Playgroud)
我尝试了3种方法,其中包含30000行数据集,返回了118104行,得到了以下平均结果:
@Mathguy还测试了一个更大的数据集:
在所有情况下,递归查询(我只测试了带有常规substr和instr的查询)做得更好,系数为2到5.下面是每个字符串的#string/tokens和层次与递归的CTAS执行时间的组合,先分层次.所有时间都是秒
小智 5
韦尔切利发布了正确答案。然而,如果要分割多个字符串,connect by将会生成指数增长的行数,并且有很多很多重复项。(只需尝试不带 的查询distinct。)这会破坏非常大的数据的性能。
解决此问题的一种常见方法是使用prior条件和附加检查来避免层次结构中的循环。就像这样:
select id, trim(regexp_substr(value,'[^,]+', 1, level) ) value, level
from tbl1
connect by regexp_substr(value, '[^,]+', 1, level) is not null
and prior id = id
and prior sys_guid() is not null
order by id, level;
Run Code Online (Sandbox Code Playgroud)
例如,请参阅 OTN 上的讨论: https: //community.oracle.com/thread/2526535
这将获得值,而无需您删除重复项或必须使用 hackSYS_GUID()或DBMS_RANDOM.VALUE()in CONNECT BY:
SELECT t.id,
v.COLUMN_VALUE AS value
FROM TBL1 t,
TABLE(
CAST(
MULTISET(
SELECT TRIM( REGEXP_SUBSTR( t.value, '[^,]+', 1, LEVEL ) )
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.value, '[^,]+' )
)
AS SYS.ODCIVARCHAR2LIST
)
) v
Run Code Online (Sandbox Code Playgroud)
更新:
返回列表中元素的索引:
选项 1 - 返回 UDT:
CREATE TYPE string_pair IS OBJECT( lvl INT, value VARCHAR2(4000) );
/
CREATE TYPE string_pair_table IS TABLE OF string_pair;
/
SELECT t.id,
v.*
FROM TBL1 t,
TABLE(
CAST(
MULTISET(
SELECT string_pair( level, TRIM( REGEXP_SUBSTR( t.value, '[^,]+', 1, LEVEL ) ) )
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.value, '[^,]+' )
)
AS string_pair_table
)
) v;
Run Code Online (Sandbox Code Playgroud)
选项 2 - 使用ROW_NUMBER():
SELECT t.id,
v.COLUMN_VALUE AS value,
ROW_NUMBER() OVER ( PARTITION BY id ORDER BY ROWNUM ) AS lvl
FROM TBL1 t,
TABLE(
CAST(
MULTISET(
SELECT TRIM( REGEXP_SUBSTR( t.value, '[^,]+', 1, LEVEL ) )
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.value, '[^,]+' )
)
AS SYS.ODCIVARCHAR2LIST
)
) v;
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
40194 次 |
| 最近记录: |