SQL*_*tor 5 t-sql window-functions group-by
我添加了一个不使用窗口函数的解决方案和一个基准测试,其中包含一个低于 Martin's Answer 的大数据集
这是GROUP BY 使用不在 SELECT 列表中的列的后续线程- 这什么时候实用、优雅或强大?
在我对这一挑战的解决方案中,我使用了一个查询,该查询按不属于选择列表的表达式进行分组。当逻辑分组元素涉及来自其他行的数据时,这经常与窗口函数一起使用。
也许这是一个矫枉过正的例子,但我认为你可能会发现挑战本身很有趣。我会等待发布我的解决方案,也许你们中的一些人可以提出更好的解决方案。
我们有一个定期记录读数值的传感器表。无法保证采样时间处于单调间隔。
您需要编写一个查询来报告“异常”,这意味着传感器报告的读数超出阈值的次数,无论是低还是高。传感器报告超过或低于阈值的每个时间段都被视为“例外”。一旦读数恢复正常,异常结束。
该脚本采用 T-SQL 格式,是我的培训材料的一部分。
------------------------------------------
-- Sensor Thresholds - 1 - Setup Example --
------------------------------------------
CREATE TABLE [Sensors]
(
[Sensor] NVARCHAR(10) NOT NULL,
[Lower Threshold] DECIMAL(7,2) NOT NULL,
[Upper Threshold] DECIMAL(7,2) NOT NULL,
CONSTRAINT [PK Sensors]
PRIMARY KEY CLUSTERED ([Sensor]),
CONSTRAINT [CK Value Range]
CHECK ([Upper Threshold] > [Lower Threshold])
);
GO
INSERT INTO [Sensors]
(
[Sensor] ,
[Lower Threshold] ,
[Upper Threshold]
)
VALUES (N'Sensor A', -50, 50 ),
(N'Sensor B', 40, 80),
(N'Sensor C', 0, 100);
GO
CREATE TABLE [Measurements]
(
[Sensor] NVARCHAR(10) NOT NULL,
[Measure Time] DATETIME2(0) NOT NULL,
[Measurement] DECIMAL(7,2) NOT NULL,
CONSTRAINT [PK Measurements]
PRIMARY KEY CLUSTERED ([Sensor], [Measure Time]),
CONSTRAINT [FK Measurements Sensors]
FOREIGN KEY ([Sensor])
REFERENCES [Sensors]([Sensor])
);
GO
INSERT INTO [Measurements]
(
[Sensor] ,
[Measure Time] ,
[Measurement]
)
VALUES ( N'Sensor A', N'20160101 08:00', -9),
( N'Sensor A', N'20160101 09:00', 30),
( N'Sensor A', N'20160101 10:30', 59),
( N'Sensor A', N'20160101 23:00', 66),
( N'Sensor A', N'20160102 08:00', 48),
( N'Sensor A', N'20160102 11:30', 08),
( N'Sensor B', N'20160101 08:00', 39), -- Note that this exception range has both over and under....
( N'Sensor B', N'20160101 10:30', 88),
( N'Sensor B', N'20160101 13:00', 75),
( N'Sensor B', N'20160102 08:00', 95),
( N'Sensor B', N'20160102 17:00', 75),
( N'Sensor C', N'20160101 09:00', 01),
( N'Sensor C', N'20160101 10:00', -1),
( N'Sensor C', N'20160101 18:00', -2),
( N'Sensor C', N'20160101 22:00', -2),
( N'Sensor C', N'20160101 23:30', -1);
GO
Run Code Online (Sandbox Code Playgroud)
Sensor Exception Start Time Exception End Time Exception Duration (minutes) Min Measurement Max Measurement Lower Threshold Upper Threshold Maximal Delta From Thresholds
------ -------------------- ------------------ ---------------------------- --------------- --------------- --------------- --------------- -----------------------------
Sensor A 2016-01-01 10:30:00 2016-01-02 08:00:00 1290 59.00 66.00 -50.00 50.00 16.00
Sensor B 2016-01-01 08:00:00 2016-01-01 13:00:00 300 39.00 88.00 40.00 80.00 8.00
Sensor B 2016-01-02 08:00:00 2016-01-02 17:00:00 540 95.00 95.00 40.00 80.00 15.00
Sensor C 2016-01-01 10:00:00 2016-01-01 23:30:00 810 -2.00 -1.00 0.00 100.00 -2.00
*/
Run Code Online (Sandbox Code Playgroud)
我可能会使用类似下面的东西。
它能够使用索引顺序并避免排序,直到它到达最终结果GROUP BY
(对我来说,它使用流聚合)
原则上,实际上并不需要这个最后的分组操作。应该可以读取按流排序的输入流Sensor, MeasureTime
并以流方式输出所需的结果,但我认为您需要为此编写 SQLCLR 过程。
WITH T1
AS (SELECT m.*,
s.[Lower Threshold],
s.[Upper Threshold],
within_threshold,
start_group_flag = IIF(within_threshold = 0 AND LAG(within_threshold, 1, 1) OVER (PARTITION BY m.[Sensor] ORDER BY [Measure Time]) = 1, 1, 0),
next_measure_time = LEAD([Measure Time]) OVER (PARTITION BY m.[Sensor] ORDER BY [Measure Time]),
overage = IIF(Measurement > [Upper Threshold], Measurement - [Upper Threshold], 0),
underage =IIF(Measurement < [Lower Threshold], Measurement - [Lower Threshold], 0)
FROM [Measurements] m
JOIN [Sensors] s
ON m.Sensor = s.Sensor
CROSS APPLY (SELECT IIF(m.[Measurement] BETWEEN s.[Lower Threshold] AND s.[Upper Threshold],1,0)) ca(within_threshold)),
T2
AS (SELECT *,
group_number = SUM(start_group_flag) OVER (PARTITION BY [Sensor] ORDER BY [Measure Time] ROWS UNBOUNDED PRECEDING)
FROM T1
WHERE within_threshold = 0)
SELECT Sensor,
[Exception Start Time] = MIN([Measure Time]),
[Exception End Time] = MAX(ISNULL(next_measure_time, [Measure Time])),
[Exception Duration (minutes)] = DATEDIFF(MINUTE, MIN([Measure Time]), MAX(ISNULL(next_measure_time, [Measure Time]))),
[Min Measurement] = MIN(Measurement),
[Max Measurement] = MAX(Measurement),
[Lower Threshold],
[Upper Threshold],
[Maximal Delta From Thresholds] = IIF(MAX(overage) > -MIN(underage), MAX(overage), MIN(underage))
FROM T2
GROUP BY group_number,
Sensor,
[Lower Threshold],
[Upper Threshold]
Run Code Online (Sandbox Code Playgroud)
按Sensor, [Measure Time]
顺序读取行的流式 SQL CLR 函数实现:
using Microsoft.SqlServer.Server;
using System;
using System.Collections;
using System.Data;
using System.Data.SqlClient;
using System.Data.SqlTypes;
public partial class UserDefinedFunctions
{
[SqlFunction(
DataAccess = DataAccessKind.Read,
FillRowMethodName = "GetExceptions_FillRow",
IsDeterministic = true,
IsPrecise = true,
Name = "GetExceptions",
SystemDataAccess = SystemDataAccessKind.None,
TableDefinition =
@"
Sensor nvarchar(10) NULL,
Exception_Start_Time datetime2(0) NULL,
Exception_End_Time datetime2(0) NULL,
Exception_Duration_Minutes integer NULL,
Min_Measurement decimal (7,2) NULL,
Max_Measurement decimal (7,2) NULL,
Lower_Threshold decimal (7,2) NULL,
Upper_Threshold decimal (7,2) NULL,
Maximal_Delta_From_Thresholds decimal (7,2) NULL
")]
public static IEnumerator GetExceptions
(
[SqlFacet(MaxSize = 256)] SqlString Instance,
[SqlFacet(MaxSize = 128)] SqlString Database
)
{
const string query =
@"
SELECT
S.Sensor,
S.[Lower Threshold],
S.[Upper Threshold],
M.[Measure Time],
M.Measurement
FROM dbo.Sensors AS S
JOIN dbo.Measurements AS M
ON M.Sensor = S.Sensor
ORDER BY
S.Sensor ASC,
M.[Measure Time] ASC;
";
var csb = new SqlConnectionStringBuilder
{
ApplicationName = "Thresholds.GetExceptions",
ContextConnection = false,
DataSource = Instance.Value,
Enlist = false,
InitialCatalog = Database.Value,
IntegratedSecurity = true
};
using (var con = new SqlConnection(csb.ConnectionString))
{
con.Open();
using (var cmd = new SqlCommand(query, con))
{
var reader = cmd.ExecuteReader(CommandBehavior.SingleResult | CommandBehavior.SequentialAccess);
Record record = null;
SensorException sensorException = null;
while (reader.Read())
{
record = new Record
{
Sensor = reader.GetSqlString(0),
LowerThreshold = reader.GetSqlDecimal(1),
UpperThreshold = reader.GetSqlDecimal(2),
MeasureTime = reader.GetDateTime(3),
Measurement = reader.GetSqlDecimal(4)
};
if (record.Measurement < record.LowerThreshold || record.Measurement > record.UpperThreshold)
{
if (sensorException == null)
{
sensorException = new SensorException
{
Sensor = record.Sensor,
Exception_Start_Time = record.MeasureTime,
Min_Measurement = record.Measurement,
Max_Measurement = record.Measurement,
Lower_Threshold = record.LowerThreshold,
Upper_Threshold = record.UpperThreshold
};
}
else
{
if (record.Measurement < sensorException.Min_Measurement)
{
sensorException.Min_Measurement = record.Measurement;
}
if (record.Measurement > sensorException.Max_Measurement)
{
sensorException.Max_Measurement = record.Measurement;
}
}
}
else
{
if (sensorException != null)
{
sensorException.Exception_End_Time = record.MeasureTime;
yield return sensorException;
sensorException = null;
}
}
}
// Final row
if (sensorException != null)
{
sensorException.Exception_End_Time = record.MeasureTime;
yield return sensorException;
sensorException = null;
}
}
}
}
public static void GetExceptions_FillRow
(
Object obj,
out SqlString Sensor,
out DateTime Exception_Start_Time,
out DateTime Exception_End_Time,
out SqlInt32 Exception_Duration_Minutes,
out SqlDecimal Min_Measurement,
out SqlDecimal Max_Measurement,
out SqlDecimal Lower_Threshold,
out SqlDecimal Upper_Threshold,
out SqlDecimal Maximal_Delta_From_Thresholds
)
{
var sensorException = (SensorException)obj;
Sensor = sensorException.Sensor;
Exception_Start_Time = sensorException.Exception_Start_Time;
Exception_End_Time = sensorException.Exception_End_Time;
Exception_Duration_Minutes = Convert.ToInt32(Exception_End_Time.Subtract(Exception_Start_Time).TotalMinutes);
Min_Measurement = sensorException.Min_Measurement;
Max_Measurement = sensorException.Max_Measurement;
Lower_Threshold = sensorException.Lower_Threshold;
Upper_Threshold = sensorException.Upper_Threshold;
var upperDiff = Max_Measurement > Upper_Threshold ? Max_Measurement - Upper_Threshold : 0;
var lowerDiff = Min_Measurement < Lower_Threshold ? Lower_Threshold - Min_Measurement : 0;
Maximal_Delta_From_Thresholds = upperDiff > lowerDiff ? upperDiff : lowerDiff;
}
internal class Record
{
internal SqlString Sensor { get; set; }
internal SqlDecimal LowerThreshold { get; set; }
internal SqlDecimal UpperThreshold { get; set; }
internal DateTime MeasureTime { get; set; }
internal SqlDecimal Measurement { get; set; }
}
internal class SensorException
{
internal SqlString Sensor { get; set; }
internal DateTime Exception_Start_Time { get; set; }
internal DateTime Exception_End_Time { get; set; }
internal SqlDecimal Min_Measurement { get; set; }
internal SqlDecimal Max_Measurement { get; set; }
internal SqlDecimal Lower_Threshold { get; set; }
internal SqlDecimal Upper_Threshold { get; set; }
}
}
Run Code Online (Sandbox Code Playgroud)
创建汇编位(有点太长,无法发布内联)
注意:由于限制,这个程序集需要EXTERNAL_ACCESS
权限,尽管它只从同一个数据库中读取。出于测试目的,创建 database 就足够了TRUSTWORTHY
,尽管有充分的理由不在生产中这样做 - 而是对程序集进行签名。
CREATE OR ALTER FUNCTION dbo.GetExceptions
(
@Instance nvarchar(256),
@Database nvarchar(128)
)
RETURNS TABLE
(
Sensor nvarchar(10) NULL,
[Exception Start Time] datetime2(0) NULL,
[Exception End Time] datetime2(0) NULL,
[Exception Duration (minutes)] integer NULL,
[Min Measurement] decimal (7,2) NULL,
[Max Measurement] decimal (7,2) NULL,
[Lower Threshold] decimal (7,2) NULL,
[Upper_Threshold] decimal (7,2) NULL,
[Maximal Delta From Thresholds] decimal (7,2) NULL
)
ORDER (Sensor, [Exception Start Time])
AS EXTERNAL NAME Thresholds.UserDefinedFunctions.GetExceptions;
Run Code Online (Sandbox Code Playgroud)
SELECT
GE.Sensor,
GE.[Exception Start Time],
GE.[Exception End Time],
GE.[Exception Duration (minutes)],
GE.[Min Measurement],
GE.[Max Measurement],
GE.[Lower Threshold],
GE.Upper_Threshold,
GE.[Maximal Delta From Thresholds]
FROM dbo.GetExceptions(@@SERVERNAME, DB_NAME()) AS GE
ORDER BY
GE.Sensor,
GE.[Exception Start Time];
Run Code Online (Sandbox Code Playgroud)
需要参数以便函数知道如何连接到源数据。
?????????????????????????????????????????????????????? ?????????????????????????????????????????????????????? ?????????????????????????????????????????????????????? ????????????????????????????????????????????? ? 传感器 ?异常开始时间?异常结束时间?异常持续时间(分钟) ? 最小测量 ? 最大测量 ? 下限 ? 上限_阈值?来自阈值的最大增量 ? ?????????????????????????????????????????????????????? ?????????????????????????????????????????????????????? ?????????????????????????????????????????????????????? ????????????????????????????????????????????? ? 传感器 A ? 2016-01-01 10:30:00 ?2016-01-02 08:00:00 ?1290?59.00?66.00?-50.00 ? 50.00 ? 16 点? ? 传感器 B ? 2016-01-01 08:00:00 ?2016-01-01 13:00:00 ?300 ? 39.00?88.00?40.00 ? 80.00 ? 8.00 ? ? 传感器 B ? 2016-01-02 08:00:00 ?2016-01-02 17:00:00 ?540?95.00?95.00?40.00 ? 80.00 ? 15.00 ? ? 传感器 C ? 2016-01-01 10:00:00 ?2016-01-01 23:30:00 ?810?-2.00 ? -1.00 ? 0.00 ? 100.00 ? 2.00 ? ?????????????????????????????????????????????????????? ?????????????????????????????????????????????????????? ?????????????????????????????????????????????????????? ?????????????????????????????????????????????