ora*_*nge 7 postgresql performance stored-procedures common-table-expression postgresql-9.2
在Convert recursive function to view的基础上,我想通过创建节点父节点的快照来加速从图中任意节点到其根节点的路径检索.这个想法是递归树遍历受到中间快照的限制,这些快照避免了任何进一步的递归,从而加快了执行时间.我还没有进行负载测试,所以我不知道这是如何在这个简单的例子之外表现的,但是早期的试验已经表明了一些瓶颈.我很乐意就如何加快/简化查询提出意见.我正在使用Postgres 9.2.2.0(20).
DROP TABLE IF EXISTS revision CASCADE;
CREATE TABLE revision (
id serial primary key,
parent_revision_id int references revision(id),
username varchar(128),
ts timestamp without time zone
);
DROP TABLE IF EXISTS revision_snapshot CASCADE;
CREATE TABLE revision_snapshot (
id serial primary key,
revision_id int,
parent_revision_id int,
depth int
);
CREATE OR REPLACE FUNCTION create_revision_snapshot(_rev int)
RETURNS void AS
$func$
DELETE FROM revision_snapshot WHERE revision_id=$1;
INSERT INTO revision_snapshot (revision_id, parent_revision_id, depth)
(SELECT $1, id, depth FROM revision_tree($1));
$func$ LANGUAGE sql;
-- Recursively return path from '_rev' to root
CREATE OR REPLACE FUNCTION revision_tree(_rev int)
RETURNS TABLE(id int, parent_revision_id int, depth int) AS
$func$
WITH RECURSIVE rev_list(id, parent_revision_id, depth) AS (
SELECT t.id, t.parent_revision_id, 1
FROM revision t
WHERE t.id = $1
UNION ALL
SELECT t.id, t.parent_revision_id, r.depth + 1
FROM rev_list r
JOIN revision t ON t.id = r.parent_revision_id
)
SELECT t.id, t.parent_revision_id, t.depth
FROM rev_list t
ORDER BY t.id;
$func$ LANGUAGE sql;
-- Fast version of 'revision_tree' (to be). This version will return the
-- revision tree making use of snapshots (recursively returning the path from
-- specified revision id to last snapshot of the path to the root + the snapshot)
CREATE OR REPLACE FUNCTION revision_tree_perf(_rev int)
RETURNS TABLE(parent_revision_id int) AS
$func$
BEGIN
CREATE TEMP TABLE graph_result ON COMMIT DROP AS
WITH RECURSIVE rev_list(id, parent_revision_id, depth) AS (
SELECT t.id, t.parent_revision_id, 1
FROM revision t
WHERE t.id = $1
UNION ALL
SELECT t.id, t.parent_revision_id, r.depth + 1
FROM rev_list r
JOIN revision t ON t.id = r.parent_revision_id
WHERE not(t.id in (select revision_id from revision_snapshot))
)
SELECT t.id, t.parent_revision_id, t.depth
FROM rev_list t
ORDER BY t.id;
RETURN QUERY
SELECT g.parent_revision_id FROM graph_result AS g WHERE g.parent_revision_id IS NOT NULL
UNION
SELECT s.parent_revision_id FROM revision_snapshot AS s WHERE
s.revision_id = (SELECT min(q.parent_revision_id) FROM graph_result as q) ORDER BY parent_revision_id;
END;
$func$
LANGUAGE 'plpgsql';
-- Example tree
--
-- +-- <10>
-- /
-- +-- <4> -- <8> --- <9> -+- <11> --- <15> --- <16> --- <17>
-- /
-- <1> --- <2> --- <3> -+
-- \
-- +-- <5> --- <6> --- <7> --- <12> -+- <14> --- <18>
-- \
-- \
-- \
-- \
-- +-- <13> --- <19> --- <20> --- <21>
--
INSERT INTO revision (username, ts, parent_revision_id) VALUES
('someone', now(), null) -- 1
,('someone', now(), 1) -- 2
,('someone', now(), 2) -- 3
,('someone', now(), 3) -- 4
,('someone', now(), 3) -- 5
,('someone', now(), 5) -- 6
,('someone', now(), 6) -- 7
,('someone', now(), 4) -- 8
,('someone', now(), 8) -- 9
,('someone', now(), 9) -- 10
,('someone', now(), 9) -- 11
,('someone', now(), 7) -- 12
,('someone', now(), 12) -- 13
,('someone', now(), 12) -- 14
,('someone', now(), 11) -- 15
,('someone', now(), 15) -- 16
,('someone', now(), 16) -- 17
,('someone', now(), 14) -- 18
,('someone', now(), 13) -- 19
,('someone', now(), 19) -- 20
,('someone', now(), 20); -- 21
-- Create a revision snapsnot
select create_revision_snapshot(13);
-- This query is meant to be faster ...
select * from revision_tree_perf(21);
-- ... than this one
select * from revision_tree(21);
Run Code Online (Sandbox Code Playgroud)
上面的例子
select create_revision_snapshot(13);
select * from revision_tree_perf(21);
Run Code Online (Sandbox Code Playgroud)
意味着产生一个记录集,表示从21到根的路径,即(21, 20, 19, 13, 12, 7, 6, 5, 3, 2, 1).部分解决方案是通过走树(21到13,因为有13的快照,因此不需要再进一步走树)并使用从13到根的已经"缓存"路径(取自revision_snapshot).希望能让它更容易理解......
更新:
我想出了一个潜在的改进.这只是在黑暗中刺伤,但我可以想象这些exists条款相当昂贵.我现在在修订表中标记了快照的存在:
CREATE TABLE revision (
id serial primary key,
parent_revision_id int references revision(id),
username varchar(128),
has_snapshot boolean default false,
ts timestamp without time zone
);
CREATE OR REPLACE FUNCTION create_revision_snapshot(_rev int) RETURNS void AS $func$
DELETE FROM revision_snapshot WHERE revision_id=$1;
INSERT INTO revision_snapshot (revision_id, parent_revision_id, depth)
(SELECT $1, id, depth FROM revision_tree($1));
-- Mark revision table to avoid costly exists/in clause
UPDATE revision SET has_snapshot = true WHERE id=$1;
$func$ LANGUAGE sql;
Run Code Online (Sandbox Code Playgroud)
这会将revision_tree_perfSP 的CTE部分更改为
WITH RECURSIVE rev_list(id, parent_revision_id, depth) AS (
SELECT t.id, t.parent_revision_id, 1 -- AS depth
FROM revision t
WHERE t.id = $1
UNION ALL
SELECT t.id, t.parent_revision_id, r.depth + 1
FROM rev_list r
JOIN revision t ON t.id = r.parent_revision_id
WHERE t.has_snapshot = false
)
SELECT t.id, t.parent_revision_id, t.depth FROM rev_list t ORDER BY t.id;
Run Code Online (Sandbox Code Playgroud)
这应该很快执行.这个难题的另一部分是从修订版ID返回revision_snapshot的内容has_snapshot=true并加入这两个结果.问题是如何从CTE获取此修订版ID.我可以将CTE的查询结果存储在临时表中并查询修订版ID,或者建议不要编写CTE并将其写为循环.这样,可以跟踪循环将退出的修订ID(何时has_snapshot = true).但我不确定这与CTE相比如何.
人们对这种方法有何看法?
这是一个完全修改的版本。
在我的最小测试中,使用的新函数revision_snapshot现在实际上更快。
我做了很多改变。最重要的是:
不要向原始表添加列。这可能会稍微加快查询速度,但也会在主表中引入成本和开销。如果您对表所做的只是执行此函数,则可能会有所帮助,但在现实生活中,这只是众多任务之一。
从函数中删除临时表。仅使用 CTE 就可以便宜得多。
修复ORDER BY,这是错误的。
有关更多信息,请阅读代码中的注释。
也可以作为->sqlfiddle来玩。
CREATE TABLE revision (
revision_id serial PRIMARY KEY -- Don't use useless name "id", that's an anti-pattern of ORMs
,parent_revision_id int NOT NULL REFERENCES revision(revision_id) DEFERRABLE
-- must be DEFERRABLE for self-reference of root
,ts timestamp NOT NULL -- all columns NOT NULL
,has_snapshot boolean NOT NULL DEFAULT FALSE -- columns ordered for perfect packing and performance
,username text NOT NULL
);
CREATE TABLE revision_snapshot (
depth int PRIMARY KEY
,revision_id int
); -- simplified
-- Recursively return path from '_revision_id' to root
CREATE OR REPLACE FUNCTION revision_tree(_revision_id int)
RETURNS TABLE(depth int, revision_id int) AS
$func$
WITH RECURSIVE l AS (
SELECT 1::int AS depth, r.parent_revision_id AS revision_id
FROM revision r
WHERE r.revision_id = $1
UNION ALL
SELECT l.depth + 1, r.parent_revision_id -- AS revision_id
FROM l
JOIN revision r USING (revision_id)
WHERE r.parent_revision_id <> 0
)
SELECT *
FROM l
ORDER BY l.depth; -- NOT revision_id!
$func$ LANGUAGE sql;
CREATE OR REPLACE FUNCTION create_revision_snapshot(_revision_id int)
RETURNS void AS
$func$
-- for tiny tables, DELETE is faster than TRUNCATE
DELETE FROM revision_snapshot;
INSERT INTO revision_snapshot (depth, revision_id)
SELECT depth, revision_id
FROM revision_tree($1);
$func$ LANGUAGE sql;
-- Faster version of 'revision_tree'.
-- Stops recursion as soon as revision_snapshot covers the "last mile" to root
CREATE OR REPLACE FUNCTION revision_tree_perf(_revision_id int)
RETURNS TABLE(revision_id int) AS
$func$
BEGIN
RETURN QUERY -- works without expensive temp table
WITH RECURSIVE l AS (
SELECT 1::int AS depth, r.parent_revision_id AS revision_id -- trim cruft, only two columns needed
FROM revision r
WHERE r.revision_id = $1
UNION ALL
SELECT l.depth + 1, r.parent_revision_id -- AS revision_id
FROM l
JOIN revision r USING (revision_id)
WHERE r.parent_revision_id <> 0 -- stop condition needed, since parent_revision_id IS NOT NULL
AND NOT EXISTS ( -- NOT EXISTS faster than IN (SELECT...)
SELECT 1 FROM revision_snapshot s WHERE s.revision_id = l.revision_id)
)
( -- extra parens needed for separate ORDER BY in UNION ALL
SELECT l.revision_id
FROM l
ORDER BY l.depth -- NOT revision_id! Bug just didn't show because the test ids were ordered.
)
UNION ALL -- NOT: UNION - correct and faster
(
SELECT s.revision_id
FROM revision_snapshot s
WHERE s.depth > (
SELECT s0.depth
FROM revision_snapshot s0
JOIN l USING (revision_id)
) -- must be exactly 1 value - follows logically from CTE
ORDER BY s.depth
);
END -- no ; needed here
$func$ LANGUAGE plpgsql; -- DO NOT quote language name!
-- Example tree
--
-- +-- <10>
-- /
-- +- <4> -- <8> -- <9> -+- <11> -- <15> -- <16> -- <17>
-- /
-- <0> -- <1> -- <2> -- <3> -+
-- \
-- +- <5> -- <6> -- <7> -- <12> -+- <14> -- <18>
-- \
-- \
-- \
-- \
-- +- <13> -- <19> -- <20> -- <21>
--
INSERT INTO revision (revision_id, username, ts, parent_revision_id) VALUES
(0, 'root', now(), 0) -- referencing itself
,(1, 'someone', now(), 0)
,(2, 'someone', now(), 1)
,(3, 'someone', now(), 2)
,(4, 'someone', now(), 3)
,(5, 'someone', now(), 3)
,(6, 'someone', now(), 5)
,(7, 'someone', now(), 6)
,(8, 'someone', now(), 4)
,(9, 'someone', now(), 8)
,(10,'someone', now(), 9)
,(11,'someone', now(), 9)
,(12,'someone', now(), 7)
,(13,'someone', now(), 12)
,(14,'someone', now(), 12)
,(15,'someone', now(), 11)
,(16,'someone', now(), 15)
,(17,'someone', now(), 16)
,(18,'someone', now(), 14)
,(19,'someone', now(), 13)
,(20,'someone', now(), 19)
,(21,'someone', now(), 20);
ANALYZE revision;
-- Create a revision snapsnot
select create_revision_snapshot(13);
ANALYZE revision_snapshot;
Run Code Online (Sandbox Code Playgroud)
称呼:
现在这个查询应该更快:
SELECT * FROM revision_tree_perf(21);
Run Code Online (Sandbox Code Playgroud)
..比这个:
SELECT * FROM revision_tree(21);
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
1938 次 |
| 最近记录: |