Mar*_*esi 2 postgresql performance optimization view postgresql-9.4 postgresql-performance
我在视图中有一个非常大的查询(让我们称之为a_sql
),这真的很快,除非我ORDER BY
在SELECT
带有小的外部使用LIMIT
:
SELECT
customs.id AS custom_id, customs.custom_name AS custom_name, customs.slug AS slug, customs.use_case AS custom_use_case,
SUM(CASE WHEN designers.id = orders.user_id AND orders.bulk = 't' THEN order_rows.quantity ELSE 0 END) AS sale_bulk,
SUM(CASE WHEN designers.id = orders.user_id AND orders.bulk = 'f' THEN order_rows.quantity ELSE 0 END) AS sale_not_bulk,
SUM(CASE WHEN designers.id = orders.user_id THEN order_rows.quantity ELSE 0 END) AS sale_total,
SUM(CASE WHEN designers.id <> orders.user_id AND orders.bulk = 't' THEN order_rows.quantity ELSE 0 END) AS buy_bulk,
SUM(CASE WHEN designers.id <> orders.user_id AND orders.bulk = 'f' THEN order_rows.quantity ELSE 0 END) AS buy_not_bulk,
SUM(CASE WHEN designers.id <> orders.user_id THEN order_rows.quantity ELSE 0 END) AS buy_total,
SUM(CASE orders.bulk WHEN 't' THEN order_rows.quantity ELSE 0 END) AS total_bulk,
SUM(CASE orders.bulk WHEN 'f' THEN order_rows.quantity ELSE 0 END) AS total_not_bulk,
COALESCE(SUM(order_rows.quantity), 0 ) AS total,
MIN(shoes.id) AS shoe_id,
MIN(shoe_models.id) AS shoe_model_id, MIN(shoe_models.name) AS shoe_model_name, MIN(shoe_models.title) AS shoe_model_title,
MIN(model_categories.id) AS model_category_id, MIN(model_categories.name) AS model_category_name,
MIN(business_orders.id) AS business_order_id, MIN(business_orders.state) AS business_order_state, MIN(business_orders.published_at) AS business_order_published_at,
MIN(designers.id) AS designer_id, MIN(designers.email) AS designer_email, MIN(designer_details.first_name) AS designer_first_name, MIN(designer_details.last_name) AS designer_last_name
FROM business_orders /* 10^6 rows */
LEFT JOIN users designers
ON designers.id = business_orders.user_id
/* 10^6 rows - business_orders has 0 or 1 users, users has n business_orders */
LEFT JOIN user_details designer_details
ON designers.id = designer_details.user_id
/* 10^6 rows - users has 0 or 1 user_details, user_details has 1 users */
INNER JOIN customs
ON business_orders.id = customs.business_order_id
/* 10^6 rows - business_orders has 1 customs, customs has 1 business_order */
LEFT JOIN shoes
ON shoes.product_id = customs.id
AND shoes.product_type = 'Custom'
/* 10^6 rows - customs has 1 shoes, shoes has 1 customs */
LEFT JOIN shoe_models
ON shoe_models.id = shoes.shoe_model_id
/* 10^2 rows - shoes has 1 shoe_models, shoe_models has n shoes */
LEFT JOIN model_categories
ON shoe_models.model_category_id = model_categories.id
/* 10^1 rows - shoe_models has 1 model_categories, model_categories has n models */
INNER JOIN sizes
ON shoes.id = sizes.shoe_id
/* 10^6 rows - sizes has 1 shoes, shoes has n sizes */
LEFT JOIN order_rows
ON order_rows.article_id = sizes.id
AND order_rows.article_type::text = 'Size'::text
/* 10^5 rows - sizes has n order_rows, order_rows has 0 or 1 size */
LEFT JOIN orders
ON orders.id = order_rows.order_id
/* 10^4 rows - order_rows has 1 orders, orders has n order_rows */
WHERE orders.state IN ('funded', 'confirmed', 'paid', 'delivered'
,'production', 'produced', 'ready_to_ship'
, 'shipped')
OR orders.id IS NULL
GROUP BY business_orders.id
Run Code Online (Sandbox Code Playgroud)
返回大约 52.000 行。
以下类型的查询在 12.728 毫秒内执行:
SELECT * FROM A_SQL LIMIT 10
Run Code Online (Sandbox Code Playgroud)
相关EXPLAIN
输出:
SELECT
customs.id AS custom_id, customs.custom_name AS custom_name, customs.slug AS slug, customs.use_case AS custom_use_case,
SUM(CASE WHEN designers.id = orders.user_id AND orders.bulk = 't' THEN order_rows.quantity ELSE 0 END) AS sale_bulk,
SUM(CASE WHEN designers.id = orders.user_id AND orders.bulk = 'f' THEN order_rows.quantity ELSE 0 END) AS sale_not_bulk,
SUM(CASE WHEN designers.id = orders.user_id THEN order_rows.quantity ELSE 0 END) AS sale_total,
SUM(CASE WHEN designers.id <> orders.user_id AND orders.bulk = 't' THEN order_rows.quantity ELSE 0 END) AS buy_bulk,
SUM(CASE WHEN designers.id <> orders.user_id AND orders.bulk = 'f' THEN order_rows.quantity ELSE 0 END) AS buy_not_bulk,
SUM(CASE WHEN designers.id <> orders.user_id THEN order_rows.quantity ELSE 0 END) AS buy_total,
SUM(CASE orders.bulk WHEN 't' THEN order_rows.quantity ELSE 0 END) AS total_bulk,
SUM(CASE orders.bulk WHEN 'f' THEN order_rows.quantity ELSE 0 END) AS total_not_bulk,
COALESCE(SUM(order_rows.quantity), 0 ) AS total,
MIN(shoes.id) AS shoe_id,
MIN(shoe_models.id) AS shoe_model_id, MIN(shoe_models.name) AS shoe_model_name, MIN(shoe_models.title) AS shoe_model_title,
MIN(model_categories.id) AS model_category_id, MIN(model_categories.name) AS model_category_name,
MIN(business_orders.id) AS business_order_id, MIN(business_orders.state) AS business_order_state, MIN(business_orders.published_at) AS business_order_published_at,
MIN(designers.id) AS designer_id, MIN(designers.email) AS designer_email, MIN(designer_details.first_name) AS designer_first_name, MIN(designer_details.last_name) AS designer_last_name
FROM business_orders /* 10^6 rows */
LEFT JOIN users designers
ON designers.id = business_orders.user_id
/* 10^6 rows - business_orders has 0 or 1 users, users has n business_orders */
LEFT JOIN user_details designer_details
ON designers.id = designer_details.user_id
/* 10^6 rows - users has 0 or 1 user_details, user_details has 1 users */
INNER JOIN customs
ON business_orders.id = customs.business_order_id
/* 10^6 rows - business_orders has 1 customs, customs has 1 business_order */
LEFT JOIN shoes
ON shoes.product_id = customs.id
AND shoes.product_type = 'Custom'
/* 10^6 rows - customs has 1 shoes, shoes has 1 customs */
LEFT JOIN shoe_models
ON shoe_models.id = shoes.shoe_model_id
/* 10^2 rows - shoes has 1 shoe_models, shoe_models has n shoes */
LEFT JOIN model_categories
ON shoe_models.model_category_id = model_categories.id
/* 10^1 rows - shoe_models has 1 model_categories, model_categories has n models */
INNER JOIN sizes
ON shoes.id = sizes.shoe_id
/* 10^6 rows - sizes has 1 shoes, shoes has n sizes */
LEFT JOIN order_rows
ON order_rows.article_id = sizes.id
AND order_rows.article_type::text = 'Size'::text
/* 10^5 rows - sizes has n order_rows, order_rows has 0 or 1 size */
LEFT JOIN orders
ON orders.id = order_rows.order_id
/* 10^4 rows - order_rows has 1 orders, orders has n order_rows */
WHERE orders.state IN ('funded', 'confirmed', 'paid', 'delivered'
,'production', 'produced', 'ready_to_ship'
, 'shipped')
OR orders.id IS NULL
GROUP BY business_orders.id
Run Code Online (Sandbox Code Playgroud)
相反,以下类型的查询在 9090.141 毫秒内执行
SELECT * FROM a_sql ORDER BY custom_id LIMIT 10
Run Code Online (Sandbox Code Playgroud)
相关EXPLAIN
输出:
Limit (cost=328570.62..328570.64 rows=10 width=324) (actual time=8987.928..8987.929 rows=10 loops=1)
Buffers: shared hit=10412 read=12400, temp read=18319 written=18063
-> Sort (cost=328570.62..328700.40 rows=51911 width=324) (actual time=8987.926..8987.926 rows=10 loops=1)
Sort Key: x.business_order_id
Sort Method: top-N heapsort Memory: 27kB
Buffers: shared hit=10412 read=12400, temp read=18319 written=18063
-> Subquery Scan on x (cost=306105.20..327448.84 rows=51911 width=324) (actual time=3074.397..8978.470 rows=8004 loops=1)
Buffers: shared hit=10412 read=12400, temp read=18319 written=18063
-> GroupAggregate (cost=306105.20..326929.73 rows=51911 width=610) (actual time=3074.395..8975.492 rows=8004 loops=1)
Group Key: business_orders.id
Buffers: shared hit=10412 read=12400, temp read=18319 written=18063
-> Sort (cost=306105.20..306782.04 rows=270739 width=610) (actual time=3073.679..3411.919 rows=467218 loops=1)
Sort Key: business_orders.id
Sort Method: external merge Disk: 56936kB
Buffers: shared hit=10412 read=12400, temp read=18319 written=18063
-> Hash Right Join (cost=98065.48..133611.68 rows=270739 width=610) (actual time=1559.328..2325.275 rows=467218 loops=1)
Hash Cond: (order_rows.article_id = sizes.id)
Filter: (((orders.state)::text = ANY ('{funded,confirmed,paid,delivered,production,produced,ready_to_ship,shipped}'::text[])) OR (orders.id IS NULL))
Rows Removed by Filter: 3712
Buffers: shared hit=10412 read=12400, temp read=9442 written=9186
-> Hash Left Join (cost=813.00..1497.05 rows=7367 width=26) (actual time=9.566..22.691 rows=7367 loops=1)
Hash Cond: (order_rows.order_id = orders.id)
Buffers: shared hit=888
-> Seq Scan on order_rows (cost=0.00..509.08 rows=7367 width=12) (actual time=0.029..5.732 rows=7367 loops=1)
Filter: ((article_type)::text = 'Size'::text)
Rows Removed by Filter: 11199
Buffers: shared hit=277
-> Hash (cost=700.78..700.78 rows=8978 width=18) (actual time=9.507..9.507 rows=8993 loops=1)
Buckets: 1024 Batches: 1 Memory Usage: 470kB
Buffers: shared hit=611
-> Seq Scan on orders (cost=0.00..700.78 rows=8978 width=18) (actual time=0.009..7.142 rows=8993 loops=1)
Buffers: shared hit=611
-> Hash (cost=57087.20..57087.20 rows=448022 width=605) (actual time=1547.263..1547.263 rows=469413 loops=1)
Buckets: 1024 Batches: 128 Memory Usage: 567kB
Buffers: shared hit=9524 read=12400, temp read=1037 written=8932
-> Hash Left Join (cost=30955.54..57087.20 rows=448022 width=605) (actual time=496.442..1160.554 rows=469413 loops=1)
Hash Cond: (shoes.shoe_model_id = shoe_models.id)
Buffers: shared hit=9524 read=12400, temp read=1037 written=1035
-> Hash Join (cost=30938.67..52547.10 rows=448022 width=69) (actual time=496.300..964.720 rows=469413 loops=1)
Hash Cond: (sizes.shoe_id = shoes.id)
Buffers: shared hit=9509 read=12400, temp read=1037 written=1035
-> Seq Scan on sizes (cost=0.00..8642.10 rows=441710 width=8) (actual time=0.009..119.758 rows=441934 loops=1)
Buffers: shared hit=797 read=3428
-> Hash (cost=29664.25..29664.25 rows=52594 width=65) (actual time=496.056..496.056 rows=54329 loops=1)
Buckets: 4096 Batches: 2 Memory Usage: 2679kB
Buffers: shared hit=8712 read=8972, temp written=294
-> Hash Left Join (cost=15725.17..29664.25 rows=52594 width=65) (actual time=162.077..460.095 rows=54329 loops=1)
Hash Cond: (designers.id = designer_details.user_id)
Buffers: shared hit=8712 read=8972
-> Hash Join (cost=11607.65..22688.39 rows=51831 width=52) (actual time=124.442..362.315 rows=51846 loops=1)
Hash Cond: (customs.id = shoes.product_id)
Buffers: shared hit=6055 read=8972
-> Hash Left Join (cost=7908.32..17952.45 rows=51831 width=48) (actual time=83.756..251.381 rows=51846 loops=1)
Hash Cond: (business_orders.user_id = designers.id)
Buffers: shared hit=3652 read=8972
-> Hash Join (cost=1843.00..10720.93 rows=51831 width=26) (actual time=27.942..139.640 rows=51846 loops=1)
Hash Cond: (customs.business_order_id = business_orders.id)
Buffers: shared hit=3079 read=4919
-> Seq Scan on customs (cost=0.00..7841.31 rows=51831 width=8) (actual time=0.009..41.084 rows=51846 loops=1)
Buffers: shared hit=2404 read=4919
-> Hash (cost=1194.11..1194.11 rows=51911 width=22) (actual time=27.888..27.888 rows=51849 loops=1)
Buckets: 8192 Batches: 1 Memory Usage: 2513kB
Buffers: shared hit=675
-> Seq Scan on business_orders (cost=0.00..1194.11 rows=51911 width=22) (actual time=0.007..15.422 rows=51849 loops=1)
Buffers: shared hit=675
-> Hash (cost=5265.70..5265.70 rows=63970 width=26) (actual time=55.788..55.788 rows=63972 loops=1)
Buckets: 8192 Batches: 1 Memory Usage: 3679kB
Buffers: shared hit=573 read=4053
-> Seq Scan on users designers (cost=0.00..5265.70 rows=63970 width=26) (actual time=0.003..35.227 rows=63972 loops=1)
Buffers: shared hit=573 read=4053
-> Hash (cost=3051.16..3051.16 rows=51853 width=12) (actual time=40.654..40.654 rows=51846 loops=1)
Buckets: 8192 Batches: 1 Memory Usage: 2154kB
Buffers: shared hit=2403
-> Seq Scan on shoes (cost=0.00..3051.16 rows=51853 width=12) (actual time=0.009..28.311 rows=51846 loops=1)
Filter: ((product_type)::text = 'Custom'::text)
Buffers: shared hit=2403
-> Hash (cost=3306.12..3306.12 rows=64912 width=17) (actual time=37.610..37.610 rows=64670 loops=1)
Buckets:
fil*_*rem 11
这是我在这种情况下所做的,通常其中一些有帮助:
查看整个查询并尝试从中删除不需要的表。
重新考虑外部 JOIN(即 LEFT/RIGHT JOIN),如果可能,将它们从视图定义中删除,替换为内部 JOIN。
尝试增加计划器常量,以便服务器可以在计划阶段投入更多精力。您可以通过将join_collapse_limit
和from_collapse_limit
增加到 12 和geqo_threshold
18来做到这一点。
如果您知道哪个计划顺序是最好的,您可以降低join_collapse_limit
到 1 并通过显式 JOIN 顺序强制正确排序。
您应该阅读有关Controlling the planner with explicit JOINs和Query Planning Configuration 的PostgreSQL 文档。
还有一种选择需要考虑:
WITH selection AS (SELECT DISTINCT ON (business_orders.id) business_orders.id FROM ... ORDER BY business_orders.id LIMIT 10)
但不使用聚合(如 count(*),min,max,avg,...)。然后将其用作整个查询的基础,在其中计算聚合。这样您就可以避免在分组之前进行昂贵的计算。Erw*_*ter 11
注意:此答案解决了几个基本问题,但不是最终解决方案。多次请求澄清后,问题仍然不一致,所以我停止了处理。
问题是:某些列上的谓词, ORDER BY
上的在不同的列上。
在您的快速查询中,没有 ORDER BY
,则可以返回前(任意)10 行。
在您的慢查询中,使用ORDER BY
,必须考虑并排序所有行,然后才能返回前 10 行。这样的查询可以使用匹配的索引进行优化......
非常相似的案例:
让我们先直接了解您的查询。我消除了所有噪音以获得概览并修复了一些明显的问题:
SELECT * -- includes shoe_model_id - list columns from sub explicitly to exclude it
, sm.id AS shoe_model_id
, sm.name AS shoe_model_name
, sm.title AS shoe_model_title
, mc.id AS model_category_id
, mc.name AS model_category_name
, u.email AS designer_email
, ud.first_name AS designer_first_name
, ud.last_name AS designer_last_name
FROM (
SELECT sh.shoe_model_id
, SUM(oro.quantity) FILTER (WHERE bo.user_id = o.user_id AND o.bulk) AS sale_bulk
, SUM(oro.quantity) FILTER (WHERE bo.user_id = o.user_id AND NOT o.bulk) AS sale_not_bulk
, SUM(oro.quantity) FILTER (WHERE bo.user_id = o.user_id) AS sale_total
, SUM(oro.quantity) FILTER (WHERE bo.user_id <> o.user_id AND o.bulk) AS buy_bulk
, SUM(oro.quantity) FILTER (WHERE bo.user_id <> o.user_id AND NOT o.bulk) AS buy_not_bulk
, SUM(oro.quantity) FILTER (WHERE bo.user_id <> o.user_id) AS buy_total
, SUM(oro.quantity) FILTER (WHERE o.bulk) AS total_bulk
, SUM(oro.quantity) FILTER (WHERE NOT o.bulk) AS total_not_bulk
, COALESCE(SUM(oro.quantity), 0) AS total
, bo.user_id AS designer_id
, bo.id AS business_order_id
, bo.state AS business_order_state
, bo.published_at AS business_order_published_at
, MIN(sh.id) AS shoe_id
FROM shoes sh
JOIN customs c ON c.id = sh.product_id
JOIN business_orders bo ON bo.id = c.business_order_id
JOIN sizes si ON si.shoe_id = sh.id
LEFT JOIN (orders o
JOIN order_rows oro ON oro.order_id = o.id
AND oro.article_type::text = 'Size'::text -- data type ???
AND o.state = ANY ('{funded,confirmed,paid, delivered
,production,produced,ready_to_ship,shipped}')
) ON oro.article_id = si.id
WHERE sh.product_type = 'Custom'
GROUP BY bo.id, sh.id
) sub
LEFT JOIN users u ON u.id = sub.designer_id
LEFT JOIN user_details ud ON ud.user_id = u.id
LEFT JOIN shoe_models sm ON sm.id = sub.shoe_model_id
LEFT JOIN model_categories mc ON mc.id = sm.model_category_id;
Run Code Online (Sandbox Code Playgroud)
由于sizes
取决于shoes
,所以LEFT JOIN
onshoes
无效。使用[INNER] JOIN
来代替。
为什么oro.article_type::text
?柱子上不应该有演员表。它的数据类型是什么?同为sh.product_type::text
您以显式连接语法连接 10 个表。这超过了默认设置join_collapse_limit
,这是8。因此,以明智和正确的方式加入变得越来越重要。查询计划器不会修复您的矛盾FROM
列表。
我只在相关表上运行(更新!)聚合,稍后加入其余表。
简化的聚合表达式 - 使用FILTER
Postgres 9.4 中的新聚合子句
比性能优化更重要的是,我怀疑您原始查询中的计算可能不正确。order_rows
即使相应的行orders
不符合条件,您也可以在总和中包含行。我通过加入orders
并order_rows
应用您的过滤器来解决这个问题,然后再将其加入其余部分(使用括号)。
归档时间: |
|
查看次数: |
1457 次 |
最近记录: |