在一组重叠的版本编号区间中,在每个时间点找到最新版本

Exc*_*dez 5 sql sql-server mathematical-optimization intervals

我正在使用一组日期间隔,其中每个间隔都有一个版本号,新的间隔经常与旧的间隔重叠,甚至是它们的子集.根据这些数据,我需要计算一组新的时间间隔,以显示每个时间点的最新版本号.是否有针对此问题的基于集合的解决方案?

这是一个例子:

Interval 1: 11111111111111111111111      
Interval 2:     2222222222               
Interval 3:   33333333333333             
Interval 4:                     444444444
Interval 5:                   555555555  
Result    : 11333333333333331155555555544
Run Code Online (Sandbox Code Playgroud)

以下是我正在使用的数据示例:

groupId   startDate  endDate     version
--------  ---------  ----------  ------
1         1/1/2010   1/1/2011    1
1         10/1/2010  7/5/2011    2
1         7/5/2011   8/13/2012   3
1         8/13/2012  12/31/2012  6
1         10/1/2012  11/1/2012   8
Run Code Online (Sandbox Code Playgroud)

......和期望的输出:

groupId   startDate  endDate     version
--------  ---------  ----------  ------
1         1/1/2010   10/1/2010   1
1         10/1/2010  7/5/2011    2
1         7/5/2011   8/13/2012   3
1         8/13/2011  10/1/2012   6
1         10/1/2012  11/1/2012   8 << note how version 8 supersedes version 6
1         11/1/2012  12/31/2012  6 << version 6 is split into two records
Run Code Online (Sandbox Code Playgroud)

我还没有找到任何其他这个问题的例子,我的谷歌搜索只会查找识别间隙和岛屿覆盖集合的查询.

我想我有一个迭代解决方案(SQL Server 2008).它以结果集中的间隔的临时表开始,并通过插入具有特殊版本号的记录来定义我们要覆盖的范围的起点和终点.然后,它重复识别结果集间隔之间的间隙,并尝试使用原始数据集中的最新记录填充它们,直到没有更多间隙或不再添加记录:

GO
-- Create data set and results table
CREATE TABLE #Data (
     groupId    INT
    ,startDate  DATE
    ,endDate    DATE
    ,versionId  INT
)

INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2007-12-22', '2008-12-22', 8)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2008-12-22', '2009-12-22', 9)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2009-12-22', '2010-12-22', 10)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2010-12-22', '2011-12-22', 11)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2011-01-01', '2011-11-30', 500)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2011-12-22', '2012-12-22', 12)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2012-01-22', '2012-12-22', 13)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2012-01-22', '2012-12-22', 14)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2012-04-22', '2012-12-22', 17)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2012-04-22', '2012-12-22', 19)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2010-01-01', '2011-01-01', 1)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2010-10-01', '2011-07-05', 2)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2011-07-05', '2012-08-13', 3)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2012-08-13', '2012-12-31', 6)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2012-10-01', '2012-11-01', 8)


CREATE TABLE #Results (
     groupId        VARCHAR(10)
    ,startDate  DATE
    ,endDate    DATE
    ,versionId      BIGINT
)

DECLARE @startDate      DATE
DECLARE @endDate        DATE
DECLARE @placeholderId  BIGINT

SET @startDate = '20030101'
SET @endDate = '20121231'
SET @placeholderId = 999999999999999

INSERT #Results
SELECT DISTINCT
     groupId
    ,CASE WHEN MIN(startDate) < @startDate THEN MIN(startDate) ELSE @startDate END
    ,CASE WHEN MIN(startDate) < @startDate THEN @startDate ELSE MIN(startDate) END
    ,@placeholderId
FROM #data
GROUP BY groupId
UNION ALL
SELECT DISTINCT
     groupId
    ,CASE WHEN MAX(endDate) < @endDate THEN MAX(endDate) ELSE @endDate END
    ,CASE WHEN MAX(endDate) < @endDate THEN @endDate ELSE MAX(endDate) END
    ,@placeholderId
FROM #data
GROUP BY groupId
GO

-- Fill gaps in results table
DECLARE @startDate      DATE
DECLARE @endDate        DATE
DECLARE @placeholderId  BIGINT

SET @startDate = '20030101'
SET @endDate = '20111231'
SET @placeholderId = 999999999999999

DECLARE @counter INT
SET @counter = 0

WHILE @counter < 10
BEGIN
    SET @counter = @counter + 1;
    WITH Gaps AS (
        SELECT
             gs.groupId
            ,gs.startDate
            ,MIN(ge.endDate) as endDate
            ,ROW_NUMBER() OVER (ORDER BY gs.groupId, gs.startDate) as gapId
        FROM (
            SELECT groupId, endDate as startDate
            FROM #Results r1 
            WHERE NOT EXISTS (
                    SELECT * 
                    FROM #Results r2 
                    WHERE r2.groupId = r1.groupId
                        AND r2.versionId <> r1.versionId
                        AND r2.startDate <= r1.endDate
                        AND r2.endDate > r1.endDate
                )
                AND NOT (endDate >= @endDate AND versionId = @placeholderId)
        ) gs
        INNER JOIN (
            SELECT groupId, startDate as endDate
            FROM #Results r1 
            WHERE NOT EXISTS (
                    SELECT * 
                    FROM #Results r2 
                    WHERE r2.groupId = r1.groupId
                        AND r2.versionId <> r1.versionId
                        AND r2.endDate >= r1.startDate
                        AND r2.startDate < r1.startDate
                )
                AND NOT (startDate <= @startDate AND versionId = @placeholderId)
        ) ge
            ON ge.groupId = gs.groupId
            AND ge.endDate >= gs.startDate
        GROUP BY gs.groupId, gs.startDate
    )
    INSERT #Results (
         groupId
        ,startDate
        ,endDate
        ,versionId
    )
    SELECT
         d.groupId
        ,CASE WHEN d.startDate < g.startDate THEN g.startDate ELSE d.startDate END
        ,CASE WHEN d.endDate > g.endDate THEN g.endDate ELSE d.endDate END
        ,d.versionId
    FROM #Data d
    INNER JOIN Gaps g
        ON g.groupId = d.groupId
        AND g.startDate <= d.endDate
        AND g.endDate >= d.startDate
    INNER JOIN (
        SELECT 
             d.groupId
            ,gapId
            ,MAX(d.versionId) as versionId
        FROM #Data d
        INNER JOIN Gaps g
            ON g.groupId = d.groupId
            AND g.startDate <= d.endDate
            AND g.endDate >= d.startDate
        WHERE d.versionId < (
                SELECT MIN(versionId)
                FROM #Results r
                WHERE r.groupId = d.groupId
                    AND (r.startDate = g.endDate OR r.endDate = g.startDate)
            )
            AND NOT EXISTS (
                SELECT *
                FROM #Data dsup
                WHERE dsup.groupId = d.groupId
                    AND dsup.versionId > d.versionId
                    AND dsup.startDate <= d.startDate
                    AND dsup.endDate >= d.endDate
            )
        GROUP BY
             d.groupId
            ,g.gapId
    ) mg
        ON mg.groupId = g.groupId
        AND mg.gapId = g.gapId
        AND mg.versionId = d.versionId
END

SELECT *
FROM #Results
WHERE versionId <> @placeholderId
order by groupId, startDate
Run Code Online (Sandbox Code Playgroud)

基于集合的解决方案会更有用,但我很难找到一个.有任何想法吗?

Ric*_*iwi 4

-- create a dates table
create table dates (thedate date primary key clustered);
;with dates(thedate) as (
  select dateadd(yy,years.number,0)+days.number
    from master..spt_values years
    join master..spt_values days
      on days.type='p' and days.number < datepart(dy,dateadd(yy,years.number+1,0)-1)
   where years.type='p' and years.number between 100 and 150
      -- note: 100-150 creates dates in the year range 2000-2050
      --       adjust as required
)
insert dbo.dates select * from dates;

-- for each date, determine the prevailing version
  select t.groupId, d.thedate, max(t.versionId) versionId
    into #tmp1
    from dates d
    join #Data t on t.startDate <= d.thedate and d.thedate <= t.endDate
group by t.groupId, d.thedate;

-- create index to help
create clustered index cix_tmp1 on #tmp1(groupId, thedate, versionId);

-- find the start dates
;with t as (
   select a.*, rn=row_number() over (partition by a.groupId order by a.thedate)
     from #tmp1 a
left join #tmp1 b on b.thedate = dateadd(d,-1,a.thedate) and a.groupId = b.groupId and a.versionId = b.versionId
    where b.versionId is null
)
   select c.groupId, c.thedate startdate, dateadd(d,-1,d.thedate) enddate, c.versionId
     from t c
left join t d on d.rn=c.rn+1 and c.groupId = d.groupId
 order by groupId, startdate;
Run Code Online (Sandbox Code Playgroud)

当然,您可以在“一个查询”中完成所有操作,但这样做会带来风险,因为性能会大幅下降。

请勿使用 - 仅出于学术兴趣 -

;with dates(thedate) as (
  select dateadd(yy,years.number,0)+days.number
    from master..spt_values years
    join master..spt_values days
      on days.type='p' and days.number < datepart(dy,dateadd(yy,years.number+1,0)-1)
   where years.type='p' and years.number between 100 and 150
      -- note: 100-150 creates dates in the year range 2000-2050
      --       adjust as required
), tmp1 as (
  select t.groupId, d.thedate, max(t.versionId) versionId
    from dates d
    join #Data t on t.startDate <= d.thedate and d.thedate <= t.endDate
group by t.groupId, d.thedate
), t as (
   select a.*, rn=row_number() over (partition by a.groupId order by a.thedate)
     from tmp1 a
left join tmp1 b on b.thedate = dateadd(d,-1,a.thedate) and a.groupId = b.groupId and a.versionId = b.versionId
    where b.versionId is null
)
   select c.groupId, c.thedate startdate, dateadd(d,-1,d.thedate) enddate, c.versionId
     from t c
left join t d on d.rn=c.rn+1 and c.groupId = d.groupId
 order by groupId, startdate;
Run Code Online (Sandbox Code Playgroud)