每天计数状态ID

Rog*_*erg 5 sql sql-server

情况:

我有三张桌子。表1包含ID和订阅日期。表2包含ID,活动状态和活动状态更改的最新日期。表3具有ID和状态更改的所有日志。 注意: 在订阅日期,所有ID均处于活动状态。当一天中有多个状态更改时,最近的一个是要选择的状态更改。

目的:

我需要计算每天每种状态的ID数。即每天有多少人活跃,不活跃和有风险。我的问题是确保即使在特定日期没有数据,ID的状态也会每天进行计数。例如:ID 1(请参阅下面的小提琴)自5月2日(加入日期)以来一直处于活动状态,并且没有状态更改,因此,直到现在,他应该每天被视为活动状态。

在别处咨询了这个问题后,有人提出创建函数并交叉应用并将计数存储在表中。我没有这样做的技能,但这是否可以解决这个问题?

所需输出:

+------------+----------+-------+
|    date    |  status  | count |
+------------+----------+-------+
| 1-May-2019 | active   |     0 |
| 1-May-2019 | inactive |     0 |
| 1-May-2019 | risky    |     1 |
| 2-May-2019 | active   |     1 |
| 2-May-2019 | inactive |     0 |
| 2-May-2019 | risky    |     1 |
| 3-May-2019 | active   |     1 |
| 3-May-2019 | inactive |     0 |
| 3-May-2019 | risky    |     1 |
| 4-May-2019 | active   |     1 |
| 4-May-2019 | inactive |     0 |
| 4-May-2019 | risky    |     1 |
| 5-May-2019 | active   |     3 |
| 5-May-2019 | inactive |     0 |
| 5-May-2019 | risky    |     1 |
| ...        | ...      |   ... |
+------------+----------+-------+
Run Code Online (Sandbox Code Playgroud)

小提琴:

--create date table (not sure if usable)
CREATE TABLE #dates ([date] date)
DECLARE @dIncr DATE = '2019-05-01'
DECLARE @dEnd DATE = dateadd(day,-1,getdate())
WHILE (@dIncr <= @dEnd)
BEGIN
  INSERT INTO #dates ([date]) VALUES (@dIncr)
  SELECT @dIncr = DATEADD(day,1,@dIncr)
END
GO

-- ID + Subscribed Date (starts active at joindate)
create table #t1 (id int, [subdate] date)
insert into #t1 values 
(9, '2019-01-01'),
(1, '2019-05-02'),
(2, '2019-05-05'),
(3, '2019-05-05'),
(4, '2019-05-10')
GO

-- ID + Latest activity date
create table #t2 (id int, [status] varchar(max), [datestatus] date)
insert into #t2 values 
(9,'risky', '2019-03-01'),
(1, 'active', '2019-05-02'),
(2, 'inactive', '2019-05-13'),
(3, 'active', '2019-05-14'),
(4, 'risky', '2019-05-15')
GO

-- ID + Activity Logs Date
create table #t3 (id int, [statuschange] varchar(max), [datechange] date)
insert into #t3 values 
(9,'inactive', '2019-01-01'),
(9,'active', '2019-02-01'),
(9,'risky', '2019-03-01'),
(2, 'risky', '2019-05-08'),
(2, 'inactive', '2019-05-13'),
(3, 'inactive', '2019-05-08'),
(3, 'active', '2019-05-14'),
(4, 'inactive', '2019-05-15'),
(4, 'risky', '2019-05-15')
GO
Run Code Online (Sandbox Code Playgroud)

我现在拥有的是:

;with cte as (
    select 
        #t1.id
        ,COALESCE(LAG(datechange) over(partition by #t1.id order by datechange),subdate) as StartDate
        ,#t3.datechange
        ,COALESCE(LAG(statuschange) over(partition by #t1.id order by datechange),'active') as PreviousStatusChange
        ,#t3.statuschange
    from #t1
    inner join #t2 on #t1.id=#t2.id
    left join #t3 on #t1.id=#t3.id
) 

        select 
            cte.id
            ,cte.StartDate
            ,coalesce(cte.datechange,'2099-01-01') as EndDate
            ,PreviousStatusChange
            ,coalesce(statuschange,previousstatuschange) AS NewStatus
        from cte 
Run Code Online (Sandbox Code Playgroud)

Chr*_*ert 2

日期表是正确的方法。您需要种子数据才能获得所需的输出。我打开了您的日期表,以便老订阅者填写。

我还添加了一个状态表,因为您的输出要求需要为每个状态的每个日期一行。

DROP TABLE IF EXISTS #dates
CREATE TABLE #dates ([date] date)
DECLARE @dIncr DATE = '01/01/2019'
DECLARE @dEnd DATE = dateadd(day,-1,getdate())
WHILE (@dIncr <= @dEnd)
BEGIN
  INSERT INTO #dates ([date]) VALUES (@dIncr)
  SELECT @dIncr = DATEADD(day,1,@dIncr)
END
GO

DROP TABLE IF EXISTS #status
CREATE TABLE #status (status varchar(20))
INSERT INTO #status VALUES
('active'),
('inactive'),
('risky')
GO

DROP TABLE IF EXISTS #t1
create table #t1 (id int, [subdate] date)
insert into #t1 values 
(9, '2019-01-01'),
(1, '2019-05-02'),
(2, '2019-05-05'),
(3, '2019-05-05'),
(4, '2019-05-10')
GO

DROP TABLE IF EXISTS #t2
create table #t2 (id int, [status] varchar(max), [datestatus] date)
insert into #t2 values 
(9,'risky', '2019-03-01'),
(1, 'active', '2019-05-02'),
(2, 'inactive', '2019-05-13'),
(3, 'active', '2019-05-14'),
(4, 'risky', '2019-05-15')
GO

DROP TABLE IF EXISTS #t3
create table #t3 (id int, [statuschange] varchar(max), [datechange] date)
insert into #t3 values 
(9,'inactive', '2019-01-01'),
(9,'active', '2019-02-01'),
(9,'risky', '2019-03-01'),
(2, 'risky', '2019-05-08'),
(2, 'inactive', '2019-05-13'),
(3, 'inactive', '2019-05-08'),
(3, 'active', '2019-05-14'),
(4, 'inactive', '2019-05-15'),
(4, 'risky', '2019-05-15')
GO

DECLARE
    @From DATE
    , @Thru DATE;

SET @From = '05/01/2019';
SET @Thru = '05/19/2019';

WITH
output_foundation AS
(
    SELECT date, status
    FROM #dates CROSS JOIN #status
)
, id_foundation AS
(
    SELECT DISTINCT id, date
    FROM #t1 CROSS JOIN #Dates
)
, id_stat AS
(
    SELECT id, datechange, statuschange FROM #t3
    UNION
    SELECT id, subdate, 'active' FROM #t1
    UNION
    SELECT id, datestatus, status FROM #t2
)
, id_spread AS
(
    SELECT
        IFDN.id
        , IFDN.date
        , IDS.statuschange
    FROM
        id_foundation AS IFDN
        LEFT OUTER JOIN id_stat AS IDS
            ON IFDN.id = IDS.id
                AND IFDN.date = IDS.datechange
), id_fill AS
(
    SELECT
        IDS.id
        , IDS.date
        , COALESCE(IDS.statuschange, LS.statuschange) AS statuschange
    FROM
        id_spread AS IDS
        OUTER APPLY
        (
            SELECT TOP 1 statuschange
            FROM id_spread
            WHERE id = IDS.id AND date < IDS.date AND statuschange IS NOT NULL
            ORDER BY date DESC
        ) AS LS
    WHERE
        (IDS.statuschange IS NOT NULL OR LS.statuschange IS NOT NULL)
)

SELECT
    OFDN.date
    , OFDN.status
    , COUNT(statuschange) AS count
FROM
    output_foundation AS OFDN
    LEFT OUTER JOIN id_fill AS IDF
        ON OFDN.date = IDF.date
            AND OFDN.status = IDF.statuschange
WHERE
    OFDN.date >= @From
    AND OFDN.date <= @Thru
GROUP BY
    OFDN.date
    , OFDN.status
ORDER BY
    OFDN.date
    , OFDN.status;
Run Code Online (Sandbox Code Playgroud)