MLE*_*LEN 5 sql sql-server join
我有一个包含1000万行的表,我试图找到谁是某些机器(id)的第一个/最后一个维护者,具体取决于某些日期,也取决于机器的状态.我的查询使用了六个连接,还有其他首选选项吗? 编辑:原始表有索引,试图优化查询替换连接 - 如果可能的话?SQL小提琴示例:
编辑(在下面添加了其他信息):
示例表:
CREATE TABLE vendor_info (
id INT,
datestamp INT,
statuz INT,
maintainer VARCHAR(25));
INSERT INTO vendor_info VALUES (1, 20180101, 0, 'Jay');
INSERT INTO vendor_info VALUES (2, 20180101, 0, 'Eric');
INSERT INTO vendor_info VALUES (3, 20180101, 1, 'David');
INSERT INTO vendor_info VALUES (1, 20180201, 1, 'Jay');
INSERT INTO vendor_info VALUES (2, 20180201, 0, 'Jay');
INSERT INTO vendor_info VALUES (3, 20180201, 1, 'Jay');
INSERT INTO vendor_info VALUES (1, 20180301, 1, 'Jay');
INSERT INTO vendor_info VALUES (2, 20180301, 1, 'David');
INSERT INTO vendor_info VALUES (3, 20180301, 1, 'Eric');
Run Code Online (Sandbox Code Playgroud)
查询和所需的输出:
SELECT
id
, MIN(datestamp) AS min_datestamp
, MAX(datestamp) AS max_datestamp
, MAX(case when statuz = 0 then datestamp end) AS max_s0_date
, MAX(case when statuz = 1 then datestamp end) AS max_s1_date
, MIN(case when statuz = 0 then datestamp end) AS min_s0_date
, MIN(case when statuz = 1 then datestamp end) AS min_s1_date
INTO vendor_dates
FROM vendor_info
GROUP BY id;
SELECT
vd.id
, v1.maintainer AS first_maintainer
, v2.maintainer AS last_maintainer
, v3.maintainer AS last_s0_maintainer
, v4.maintainer AS last_s1_maintainer
, v5.maintainer AS first_s0_maintainer
, v6.maintainer AS first_s1_maintainer
FROM vendor_dates vd
LEFT JOIN vendor_info v1 ON vd.id = v1.id AND vd.min_datestamp = v1.datestamp
LEFT JOIN vendor_info v2 ON vd.id = v2.id AND vd.max_datestamp = v2.datestamp
LEFT JOIN vendor_info v3 ON vd.id = v3.id AND vd.max_s0_date = v3.datestamp
LEFT JOIN vendor_info v4 ON vd.id = v4.id AND vd.max_s1_date = v4.datestamp
LEFT JOIN vendor_info v5 ON vd.id = v5.id AND vd.min_s0_date = v5.datestamp
LEFT JOIN vendor_info v6 ON vd.id = v6.id AND vd.min_s1_date = v6.datestamp;
Run Code Online (Sandbox Code Playgroud)
Adding an index to vendor_info reduces duration of your 2nd query from over 300ms to under 30ms average over repeated runs
PRIMARY KEY CLUSTERED (id, datestamp)
Changing the 2 step process into a CTE reduces total duration even more to well under 15ms average over repeated runs.
The CTE method lets the query optimiser use the new primary key
CREATE TABLE vendor_info (
id INT,
datestamp INT,
statuz INT,
maintainer VARCHAR(25)
PRIMARY KEY CLUSTERED (id, datestamp)
);
INSERT INTO vendor_info VALUES (1, 20180101, 0, 'Jay');
INSERT INTO vendor_info VALUES (2, 20180101, 0, 'Eric');
INSERT INTO vendor_info VALUES (3, 20180101, 1, 'David');
INSERT INTO vendor_info VALUES (1, 20180201, 1, 'Jay');
INSERT INTO vendor_info VALUES (2, 20180201, 0, 'Jay');
INSERT INTO vendor_info VALUES (3, 20180201, 1, 'Jay');
INSERT INTO vendor_info VALUES (1, 20180301, 1, 'Jay');
INSERT INTO vendor_info VALUES (2, 20180301, 1, 'David');
INSERT INTO vendor_info VALUES (3, 20180301, 1, 'Eric');
WITH vendor_dates AS
(SELECT
id
, MIN(datestamp) AS min_datestamp
, MAX(datestamp) AS max_datestamp
, MAX(case when statuz = 0 then datestamp end) AS max_s0_date
, MAX(case when statuz = 1 then datestamp end) AS max_s1_date
, MIN(case when statuz = 0 then datestamp end) AS min_s0_date
, MIN(case when statuz = 1 then datestamp end) AS min_s1_date
FROM vendor_info
GROUP BY id
)
SELECT
vd.id
, v1.maintainer AS first_maintainer
, v2.maintainer AS last_maintainer
, v3.maintainer AS last_s0_maintainer
, v4.maintainer AS last_s1_maintainer
, v5.maintainer AS first_s0_maintainer
, v6.maintainer AS first_s1_maintainer
FROM vendor_dates vd
LEFT JOIN vendor_info v1 ON vd.id = v1.id AND vd.min_datestamp = v1.datestamp
LEFT JOIN vendor_info v2 ON vd.id = v2.id AND vd.max_datestamp = v2.datestamp
LEFT JOIN vendor_info v3 ON vd.id = v3.id AND vd.max_s0_date = v3.datestamp
LEFT JOIN vendor_info v4 ON vd.id = v4.id AND vd.max_s1_date = v4.datestamp
LEFT JOIN vendor_info v5 ON vd.id = v5.id AND vd.min_s0_date = v5.datestamp
LEFT JOIN vendor_info v6 ON vd.id = v6.id AND vd.min_s1_date = v6.datestamp;
Run Code Online (Sandbox Code Playgroud)