chr*_*ris 10 mysql group-by distinct
我有两张这样的桌子.'order'表有21886行.
CREATE TABLE `order` (
`id` bigint(20) unsigned NOT NULL,
`reg_date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`),
KEY `idx_reg_date` (`reg_date`),
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci
CREATE TABLE `order_detail_products` (
`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
`order_id` bigint(20) unsigned NOT NULL,
`order_detail_id` int(11) NOT NULL,
`prod_id` int(11) NOT NULL,
PRIMARY KEY (`id`),
KEY `idx_order_detail_id` (`order_detail_id`,`prod_id`),
KEY `idx_order_id` (`order_id`,`order_detail_id`,`prod_id`)
) ENGINE=InnoDB AUTO_INCREMENT=572375 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci
Run Code Online (Sandbox Code Playgroud)
我的问题在这里.
MariaDB [test]> explain
-> SELECT DISTINCT A.id
-> FROM order A
-> JOIN order_detail_products B ON A.id = B.order_id
-> ORDER BY A.reg_date DESC LIMIT 100, 30;
+------+-------------+-------+-------+---------------+--------------+---------+-------------------+-------+----------------------------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+------+-------------+-------+-------+---------------+--------------+---------+-------------------+-------+----------------------------------------------+
| 1 | SIMPLE | A | index | PRIMARY | idx_reg_date | 8 | NULL | 22151 | Using index; Using temporary; Using filesort |
| 1 | SIMPLE | B | ref | idx_order_id | idx_order_id | 8 | bom_20140804.A.id | 2 | Using index; Distinct |
+------+-------------+-------+-------+---------------+--------------+---------+-------------------+-------+----------------------------------------------+
2 rows in set (0.00 sec)
MariaDB [test]> explain
-> SELECT A.id
-> FROM order A
-> JOIN order_detail_products B ON A.id = B.order_id
-> GROUP BY A.id
-> ORDER BY A.reg_date DESC LIMIT 100, 30;
+------+-------------+-------+-------+---------------+--------------+---------+-------------------+------+------------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+------+-------------+-------+-------+---------------+--------------+---------+-------------------+------+------------------------------+
| 1 | SIMPLE | A | index | PRIMARY | idx_reg_date | 8 | NULL | 65 | Using index; Using temporary |
| 1 | SIMPLE | B | ref | idx_order_id | idx_order_id | 8 | bom_20140804.A.id | 2 | Using index |
+------+-------------+-------+-------+---------------+--------------+---------+-------------------+------+------------------------------+
Run Code Online (Sandbox Code Playgroud)
上面列出的是,两个查询返回相同的结果但不同的结果太慢(解释太多行).有什么不同?
通常建议使用DISTINCT而不是GROUP BY,因为那是您真正想要的,并让优化器选择“最佳”执行计划。然而,没有一个优化器是完美的。使用DISTINCT优化器可以让执行计划有更多的选择。但这也意味着它有更多的选择来选择糟糕的计划。
您写道查询DISTINCT“慢”,但没有告诉任何数字。在我的测试中( MariaDB 10.0.19和10.3.13上的行数是其 10 倍),DISTINCT查询速度(仅)慢了 25%(562ms/453ms)。结果EXPLAIN一点帮助都没有。甚至是“撒谎”。使用LIMIT 100, 30它需要读取至少 130 行(这就是我EXPLAIN实际显示的GROUP BY),但它显示了 65 行。
我无法解释执行时间上 25% 的差异,但似乎引擎在任何情况下都会进行完整的表/索引扫描,并对结果进行排序,然后才能跳过 100 行并选择 30 行。
最好的计划可能是:
idx_reg_date索引(表)中逐行读取行Aidx_order_id查看索引(表B)中是否有匹配项如果其中大约 10% 的行在 中A没有匹配项B,则此计划将从 中读取类似 143 行的内容A。
为了以某种方式强制实施这个计划,我能做的最好的事情是:
SELECT A.id
FROM `order` A
WHERE EXISTS (SELECT * FROM order_detail_products B WHERE A.id = B.order_id)
ORDER BY A.reg_date DESC
LIMIT 30
OFFSET 100
Run Code Online (Sandbox Code Playgroud)
此查询在 156 毫秒内返回相同的结果(比 快 3 倍GROUP BY)。但这仍然太慢。它可能仍在读取 table 中的所有行A。
我们可以证明通过一个“小”子查询技巧可以存在更好的计划:
SELECT A.id
FROM (
SELECT id, reg_date
FROM `order`
ORDER BY reg_date DESC
LIMIT 1000
) A
WHERE EXISTS (SELECT * FROM order_detail_products B WHERE A.id = B.order_id)
ORDER BY A.reg_date DESC
LIMIT 30
OFFSET 100
Run Code Online (Sandbox Code Playgroud)
此查询在“无时间”(~ 0 毫秒)内执行,并在我的测试数据上返回相同的结果。虽然它不是 100% 可靠,但它表明优化器做得不好。
那么我的结论是什么:
DISTINCT并不总是比GROUP BYdrop table if exists `order`;
CREATE TABLE `order` (
`id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
`reg_date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`),
KEY `idx_reg_date` (`reg_date`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
insert into `order`(reg_date)
select from_unixtime(floor(rand(1) * 1000000000)) as reg_date
from information_schema.COLUMNS a
, information_schema.COLUMNS b
limit 218860;
drop table if exists `order_detail_products`;
CREATE TABLE `order_detail_products` (
`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
`order_id` bigint(20) unsigned NOT NULL,
`order_detail_id` int(11) NOT NULL,
`prod_id` int(11) NOT NULL,
PRIMARY KEY (`id`),
KEY `idx_order_detail_id` (`order_detail_id`,`prod_id`),
KEY `idx_order_id` (`order_id`,`order_detail_id`,`prod_id`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
insert into order_detail_products(id, order_id, order_detail_id, prod_id)
select null as id
, floor(rand(2)*218860)+1 as order_id
, 0 as order_detail_id
, 0 as prod_id
from information_schema.COLUMNS a
, information_schema.COLUMNS b
limit 437320;
Run Code Online (Sandbox Code Playgroud)
SELECT DISTINCT A.id
FROM `order` A
JOIN order_detail_products B ON A.id = B.order_id
ORDER BY A.reg_date DESC
LIMIT 30 OFFSET 100;
-- 562 ms
SELECT A.id
FROM `order` A
JOIN order_detail_products B ON A.id = B.order_id
GROUP BY A.id
ORDER BY A.reg_date DESC
LIMIT 30 OFFSET 100;
-- 453 ms
SELECT A.id
FROM `order` A
WHERE EXISTS (SELECT * FROM order_detail_products B WHERE A.id = B.order_id)
ORDER BY A.reg_date DESC
LIMIT 30 OFFSET 100;
-- 156 ms
SELECT A.id
FROM (
SELECT id, reg_date
FROM `order`
ORDER BY reg_date DESC
LIMIT 1000
) A
WHERE EXISTS (SELECT * FROM order_detail_products B WHERE A.id = B.order_id)
ORDER BY A.reg_date DESC
LIMIT 30 OFFSET 100;
-- ~ 0 ms
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
7874 次 |
| 最近记录: |