Exc*_*dez 5 sql sql-server mathematical-optimization intervals
我正在使用一组日期间隔,其中每个间隔都有一个版本号,新的间隔经常与旧的间隔重叠,甚至是它们的子集.根据这些数据,我需要计算一组新的时间间隔,以显示每个时间点的最新版本号.是否有针对此问题的基于集合的解决方案?
这是一个例子:
Interval 1: 11111111111111111111111
Interval 2: 2222222222
Interval 3: 33333333333333
Interval 4: 444444444
Interval 5: 555555555
Result : 11333333333333331155555555544
Run Code Online (Sandbox Code Playgroud)
以下是我正在使用的数据示例:
groupId startDate endDate version
-------- --------- ---------- ------
1 1/1/2010 1/1/2011 1
1 10/1/2010 7/5/2011 2
1 7/5/2011 8/13/2012 3
1 8/13/2012 12/31/2012 6
1 10/1/2012 11/1/2012 8
Run Code Online (Sandbox Code Playgroud)
......和期望的输出:
groupId startDate endDate version
-------- --------- ---------- ------
1 1/1/2010 10/1/2010 1
1 10/1/2010 7/5/2011 2
1 7/5/2011 8/13/2012 3
1 8/13/2011 10/1/2012 6
1 10/1/2012 11/1/2012 8 << note how version 8 supersedes version 6
1 11/1/2012 12/31/2012 6 << version 6 is split into two records
Run Code Online (Sandbox Code Playgroud)
我还没有找到任何其他这个问题的例子,我的谷歌搜索只会查找识别间隙和岛屿或覆盖集合的查询.
我想我有一个迭代解决方案(SQL Server 2008).它以结果集中的间隔的临时表开始,并通过插入具有特殊版本号的记录来定义我们要覆盖的范围的起点和终点.然后,它重复识别结果集间隔之间的间隙,并尝试使用原始数据集中的最新记录填充它们,直到没有更多间隙或不再添加记录:
GO
-- Create data set and results table
CREATE TABLE #Data (
groupId INT
,startDate DATE
,endDate DATE
,versionId INT
)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2007-12-22', '2008-12-22', 8)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2008-12-22', '2009-12-22', 9)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2009-12-22', '2010-12-22', 10)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2010-12-22', '2011-12-22', 11)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2011-01-01', '2011-11-30', 500)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2011-12-22', '2012-12-22', 12)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2012-01-22', '2012-12-22', 13)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2012-01-22', '2012-12-22', 14)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2012-04-22', '2012-12-22', 17)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2012-04-22', '2012-12-22', 19)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2010-01-01', '2011-01-01', 1)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2010-10-01', '2011-07-05', 2)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2011-07-05', '2012-08-13', 3)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2012-08-13', '2012-12-31', 6)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2012-10-01', '2012-11-01', 8)
CREATE TABLE #Results (
groupId VARCHAR(10)
,startDate DATE
,endDate DATE
,versionId BIGINT
)
DECLARE @startDate DATE
DECLARE @endDate DATE
DECLARE @placeholderId BIGINT
SET @startDate = '20030101'
SET @endDate = '20121231'
SET @placeholderId = 999999999999999
INSERT #Results
SELECT DISTINCT
groupId
,CASE WHEN MIN(startDate) < @startDate THEN MIN(startDate) ELSE @startDate END
,CASE WHEN MIN(startDate) < @startDate THEN @startDate ELSE MIN(startDate) END
,@placeholderId
FROM #data
GROUP BY groupId
UNION ALL
SELECT DISTINCT
groupId
,CASE WHEN MAX(endDate) < @endDate THEN MAX(endDate) ELSE @endDate END
,CASE WHEN MAX(endDate) < @endDate THEN @endDate ELSE MAX(endDate) END
,@placeholderId
FROM #data
GROUP BY groupId
GO
-- Fill gaps in results table
DECLARE @startDate DATE
DECLARE @endDate DATE
DECLARE @placeholderId BIGINT
SET @startDate = '20030101'
SET @endDate = '20111231'
SET @placeholderId = 999999999999999
DECLARE @counter INT
SET @counter = 0
WHILE @counter < 10
BEGIN
SET @counter = @counter + 1;
WITH Gaps AS (
SELECT
gs.groupId
,gs.startDate
,MIN(ge.endDate) as endDate
,ROW_NUMBER() OVER (ORDER BY gs.groupId, gs.startDate) as gapId
FROM (
SELECT groupId, endDate as startDate
FROM #Results r1
WHERE NOT EXISTS (
SELECT *
FROM #Results r2
WHERE r2.groupId = r1.groupId
AND r2.versionId <> r1.versionId
AND r2.startDate <= r1.endDate
AND r2.endDate > r1.endDate
)
AND NOT (endDate >= @endDate AND versionId = @placeholderId)
) gs
INNER JOIN (
SELECT groupId, startDate as endDate
FROM #Results r1
WHERE NOT EXISTS (
SELECT *
FROM #Results r2
WHERE r2.groupId = r1.groupId
AND r2.versionId <> r1.versionId
AND r2.endDate >= r1.startDate
AND r2.startDate < r1.startDate
)
AND NOT (startDate <= @startDate AND versionId = @placeholderId)
) ge
ON ge.groupId = gs.groupId
AND ge.endDate >= gs.startDate
GROUP BY gs.groupId, gs.startDate
)
INSERT #Results (
groupId
,startDate
,endDate
,versionId
)
SELECT
d.groupId
,CASE WHEN d.startDate < g.startDate THEN g.startDate ELSE d.startDate END
,CASE WHEN d.endDate > g.endDate THEN g.endDate ELSE d.endDate END
,d.versionId
FROM #Data d
INNER JOIN Gaps g
ON g.groupId = d.groupId
AND g.startDate <= d.endDate
AND g.endDate >= d.startDate
INNER JOIN (
SELECT
d.groupId
,gapId
,MAX(d.versionId) as versionId
FROM #Data d
INNER JOIN Gaps g
ON g.groupId = d.groupId
AND g.startDate <= d.endDate
AND g.endDate >= d.startDate
WHERE d.versionId < (
SELECT MIN(versionId)
FROM #Results r
WHERE r.groupId = d.groupId
AND (r.startDate = g.endDate OR r.endDate = g.startDate)
)
AND NOT EXISTS (
SELECT *
FROM #Data dsup
WHERE dsup.groupId = d.groupId
AND dsup.versionId > d.versionId
AND dsup.startDate <= d.startDate
AND dsup.endDate >= d.endDate
)
GROUP BY
d.groupId
,g.gapId
) mg
ON mg.groupId = g.groupId
AND mg.gapId = g.gapId
AND mg.versionId = d.versionId
END
SELECT *
FROM #Results
WHERE versionId <> @placeholderId
order by groupId, startDate
Run Code Online (Sandbox Code Playgroud)
基于集合的解决方案会更有用,但我很难找到一个.有任何想法吗?
-- create a dates table
create table dates (thedate date primary key clustered);
;with dates(thedate) as (
select dateadd(yy,years.number,0)+days.number
from master..spt_values years
join master..spt_values days
on days.type='p' and days.number < datepart(dy,dateadd(yy,years.number+1,0)-1)
where years.type='p' and years.number between 100 and 150
-- note: 100-150 creates dates in the year range 2000-2050
-- adjust as required
)
insert dbo.dates select * from dates;
-- for each date, determine the prevailing version
select t.groupId, d.thedate, max(t.versionId) versionId
into #tmp1
from dates d
join #Data t on t.startDate <= d.thedate and d.thedate <= t.endDate
group by t.groupId, d.thedate;
-- create index to help
create clustered index cix_tmp1 on #tmp1(groupId, thedate, versionId);
-- find the start dates
;with t as (
select a.*, rn=row_number() over (partition by a.groupId order by a.thedate)
from #tmp1 a
left join #tmp1 b on b.thedate = dateadd(d,-1,a.thedate) and a.groupId = b.groupId and a.versionId = b.versionId
where b.versionId is null
)
select c.groupId, c.thedate startdate, dateadd(d,-1,d.thedate) enddate, c.versionId
from t c
left join t d on d.rn=c.rn+1 and c.groupId = d.groupId
order by groupId, startdate;
Run Code Online (Sandbox Code Playgroud)
当然,您可以在“一个查询”中完成所有操作,但这样做会带来风险,因为性能会大幅下降。
请勿使用 - 仅出于学术兴趣 -
;with dates(thedate) as (
select dateadd(yy,years.number,0)+days.number
from master..spt_values years
join master..spt_values days
on days.type='p' and days.number < datepart(dy,dateadd(yy,years.number+1,0)-1)
where years.type='p' and years.number between 100 and 150
-- note: 100-150 creates dates in the year range 2000-2050
-- adjust as required
), tmp1 as (
select t.groupId, d.thedate, max(t.versionId) versionId
from dates d
join #Data t on t.startDate <= d.thedate and d.thedate <= t.endDate
group by t.groupId, d.thedate
), t as (
select a.*, rn=row_number() over (partition by a.groupId order by a.thedate)
from tmp1 a
left join tmp1 b on b.thedate = dateadd(d,-1,a.thedate) and a.groupId = b.groupId and a.versionId = b.versionId
where b.versionId is null
)
select c.groupId, c.thedate startdate, dateadd(d,-1,d.thedate) enddate, c.versionId
from t c
left join t d on d.rn=c.rn+1 and c.groupId = d.groupId
order by groupId, startdate;
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
508 次 |
| 最近记录: |