麻烦使用ROW_NUMBER()OVER(PARTITION BY ...)

Thr*_*ian 14 sql sql-server row-number sql-server-2008 gaps-and-islands

我正在使用SQL Server 2008 R2.我有一个名为EmployeeHistory的表,其中包含以下结构和示例数据:

EmployeeID Date      DepartmentID SupervisorID
10001      20130101  001          10009
10001      20130909  001          10019
10001      20131201  002          10018
10001      20140501  002          10017
10001      20141001  001          10015
10001      20141201  001          10014
Run Code Online (Sandbox Code Playgroud)

请注意,随着时间的推移,Employee 10001已经更改了2个部门和多个主管.我想要做的是列出按日期字段排序的每个部门中该员工的工作的开始和结束日期.所以,输出看起来像这样:

EmployeeID DateStart DateEnd  DepartmentID 
10001      20130101  20131201 001
10001      20131201  20141001 002
10001      20141001  NULL     001
Run Code Online (Sandbox Code Playgroud)

我打算使用以下查询对数据进行分区,但失败了.部门从001变为002然后又变回001.显然我不能通过DepartmentID进行分区......我确信我忽略了显而易见的事情.有帮助吗?先感谢您.

SELECT * ,ROW_NUMBER() OVER (PARTITION BY EmployeeID, DepartmentID
ORDER BY [Date]) RN FROM EmployeeHistory
Run Code Online (Sandbox Code Playgroud)

Tre*_*vor 9

我会做这样的事情:

;WITH x 
 AS (SELECT *, 
            Row_number() 
              OVER( 
                partition BY employeeid 
                ORDER BY datestart) rn 
     FROM   employeehistory) 
SELECT * 
FROM   x x1 
   LEFT OUTER JOIN x x2 
                ON x1.rn = x2.rn + 1 
Run Code Online (Sandbox Code Playgroud)

或许它可能是x2.rn - 1.你必须看到.无论如何,你明白了.一旦您将表连接起来,您就可以对其进行过滤,分组,排序等,以获得所需的内容.


Dom*_*c P 7

有点涉及。最简单的方法是参考我为您创建的SQL Fiddle,它产生确切的结果。出于性能或其他方面的考虑,可以通过多种方法对其进行改进,但是至少应该比某些替代方案更清晰。

要点是,首先要获得数据的规范排名,然后使用该排名将数据分为几组,然后为每个组找到结束日期,然后消除任何中间行。ROW_NUMBER()和CROSS APPLY在可读性方面有很大帮助。


编辑2019:

实际上,由于某些原因,SQL Fiddle确实确实坏了,但是在SQL Fiddle站点上似乎是一个问题。这是一个完整的版本,刚刚在SQL Server 2016上进行了测试:

CREATE TABLE Source
(
  EmployeeID int,
  DateStarted date,
  DepartmentID int
)

INSERT INTO Source
VALUES
(10001,'2013-01-01',001),
(10001,'2013-09-09',001),
(10001,'2013-12-01',002),
(10001,'2014-05-01',002),
(10001,'2014-10-01',001),
(10001,'2014-12-01',001)


SELECT *, 
  ROW_NUMBER() OVER (PARTITION BY EmployeeID ORDER BY DateStarted) AS EntryRank,
  newid() as GroupKey,
  CAST(NULL AS date) AS EndDate
INTO #RankedData
FROM Source
;

UPDATE #RankedData
SET GroupKey = beginDate.GroupKey
FROM #RankedData sup
  CROSS APPLY 
  (
    SELECT TOP 1 GroupKey
    FROM #RankedData sub 
    WHERE sub.EmployeeID = sup.EmployeeID AND
      sub.DepartmentID = sup.DepartmentID AND
      NOT EXISTS 
        (
          SELECT * 
          FROM #RankedData bot 
          WHERE bot.EmployeeID = sup.EmployeeID AND
            bot.EntryRank BETWEEN sub.EntryRank AND sup.EntryRank AND
            bot.DepartmentID <> sup.DepartmentID
        )
      ORDER BY DateStarted ASC
    ) beginDate (GroupKey);

UPDATE #RankedData
SET EndDate = nextGroup.DateStarted
FROM #RankedData sup
  CROSS APPLY 
  (
    SELECT TOP 1 DateStarted
    FROM #RankedData sub
    WHERE sub.EmployeeID = sup.EmployeeID AND
      sub.DepartmentID <> sup.DepartmentID AND
      sub.EntryRank > sup.EntryRank
    ORDER BY EntryRank ASC
  ) nextGroup (DateStarted);

SELECT * FROM 
(
SELECT *, ROW_NUMBER() OVER (PARTITION BY GroupKey ORDER BY EntryRank ASC) AS GroupRank FROM #RankedData
) FinalRanking
WHERE GroupRank = 1
ORDER BY EntryRank;

DROP TABLE #RankedData
DROP TABLE Source
Run Code Online (Sandbox Code Playgroud)