Ste*_*ith 8 postgresql aggregate postgresql-9.5
是否可以在 Postgres 中创建某种分组链?假设我有以下图表:
CREATE TABLE foo AS
SELECT row_number() OVER () AS id, *
FROM ( VALUES
( 'X', 'D', 'G', 'P' ),
( 'F', 'D', 'L', 'M' ),
( 'X', 'N', 'R', 'S' ),
( 'Y', 'I', 'W', NULL ),
( 'U', 'Z', 'E', NULL )
) AS f(a,b,c,d);
id | a | b | c | d
------------------
1 | X | D | G | P
2 | F | D | L | M
3 | X | N | R | S
4 | Y | I | W |
5 | U | Z | E |
Run Code Online (Sandbox Code Playgroud)
我想以某种方式制作一个GROUP BY
产生三个组的:
1
,2
并3
一起
1
并且2
因为共同的D
在b
列1
并且3
因为共同的X
在a
列4
单独(任何列中都没有公共值;空值不应该匹配)5
单独(任何列中都没有公共值;空值不应该匹配)我目前使用的是 Postgres 9.5,但我们最终会升级到 9.6,所以如果那里有什么可以帮助我的话,我愿意听取它。
换句话说,我正在寻找类似的东西(假设我使用array_agg(DISTINCT a)
, etc. 来保持显示更简单):
ids | as | bs | cs | ds
-----------------------------------------------------------------------
{1, 2, 3} | {'X', 'F'} | {'D', 'N'} | {'G', 'L', 'R'} | {'P', 'M', 'S'}
{4} | {'Y'} | {'I'} | {'W'} | {NULL}
{5} | {'U'} | {'Z'} | {'E'} | {NULL}
Run Code Online (Sandbox Code Playgroud)
(我不确定空值将如何显示,所以不要太在意;重要的一点是它们不应该相互匹配。)
当我使用 时GROUP BY CUBE (a, b, c, d)
,我得到了三个以上的结果......同上GROUP BY ROLLUP
和GROUP BY GROUPING SETS
.
Postgres 有优雅的方法吗?我可以想象您将如何通过 Active Record 在 Ruby 中执行此操作(循环遍历每条记录,将其与之前匹配的分组集分组),但如果可能,我想将其保留在 Postgres 中。
另一个递归解决方案:
初始数据(从Jack Douglas 的解决方案中复制):
begin;
create schema stack;
set search_path=stack;
create table foo as
select *
from (values (1,'X','D','G','P')
, (2,'F','D','L','M')
, (3,'X','N','R','S')
, (4,'Y','I','W',null)
, (5,'U','Z','E',null) ) AS f(id,a,b,c,d);
Run Code Online (Sandbox Code Playgroud)
查询:
with recursive
al (tail, head) as -- adjacency list
( select f.id, g.id
from foo as f join foo as g
on (f.a = g.a or f.b = g.b or f.c = g.c or f.d = g.d)
),
tc (tail, head) as -- transitive closure
( select * from al
union distinct
select f.tail, g.head
from al as f join tc as g on f.head = g.tail
) ,
cc (head, ids) as -- group once
( select head, array_agg(distinct tail order by tail) as ids
from tc
group by head
)
select -- group twice
ids,
array_agg(distinct a order by a) as a,
array_agg(distinct b order by b) as b,
array_agg(distinct c order by c) as c,
array_agg(distinct d order by d) as d
from
cc join foo on cc.head = foo.id
group by ids ;
Run Code Online (Sandbox Code Playgroud)
begin;
create schema stack;
set search_path=stack;
create table foo as
select *
from (values (1,'X','D','G','P')
, (2,'F','D','L','M')
, (3,'X','N','R','S')
, (4,'Y','I','W',null)
, (5,'U','Z','E',null) ) AS f(id,a,b,c,d);
Run Code Online (Sandbox Code Playgroud)
清理:
rollback;
Run Code Online (Sandbox Code Playgroud)
假设您追求的是通用解决方案,我认为没有任何非递归方法可以解决您的问题。如果您的实际问题需要处理大量行,那么您可能需要削减工作以获得一个足够好的扩展解决方案。
测试模式和数据:
begin;
create schema stack;
set search_path=stack;
create table foo as
select *
from (values (1,'X','D','G','P')
, (2,'F','D','L','M')
, (3,'X','N','R','S')
, (4,'Y','I','W',null)
, (5,'U','Z','E',null) ) AS f(id,a,b,c,d);
Run Code Online (Sandbox Code Playgroud)
解决方案:
with recursive t(id,a,b,c,d,start,path,cycle) as (
select *, id, array[id], false from foo
union all
select f.*, start, path||f.id, f.id=any(path)
from foo f join t
on f.id<>t.id and
(f.a=t.a or f.b=t.b or f.c=t.c or f.d=t.d) where not cycle )
select array_agg(f.id order by f.id) ids
, array_agg(distinct a order by a) a
, array_agg(distinct b order by b) b
, array_agg(distinct c order by c) c
, array_agg(distinct d order by d) d
from foo f join ( select start id, array_agg(id order by id) ids
from t
where not cycle group by start) z on z.id=f.id
group by ids::text;
Run Code Online (Sandbox Code Playgroud)
begin;
create schema stack;
set search_path=stack;
create table foo as
select *
from (values (1,'X','D','G','P')
, (2,'F','D','L','M')
, (3,'X','N','R','S')
, (4,'Y','I','W',null)
, (5,'U','Z','E',null) ) AS f(id,a,b,c,d);
Run Code Online (Sandbox Code Playgroud)
清理:
rollback;
Run Code Online (Sandbox Code Playgroud)