您应该多久更新一次统计数据?

Hen*_*sen 3 sql-server-2008 sql-server statistics azure-sql-database sql-server-2017

您应该多久更新一次统计数据?什么是“太少”?“太频繁”有多频繁?

答案是“这取决于”您的数据库、用户、数据等。

所以我试图在两个表中记录我们的统计数据随着时间的推移是什么样的。他们来了:

DROP TABLE /*IF EXISTS */ dbo.dm_db_stats_histogram
DROP TABLE /*IF EXISTS */ dbo.dm_db_stats_properties
go
CREATE TABLE dbo.dm_db_stats_properties(
  dm_db_stats_propertiesID INT IDENTITY(1,1) NOT NULL constraint PK_dm_db_stats_properties PRIMARY KEY CLUSTERED,
  DatabaseId INT NOT NULL,
  object_id int NOT NULL,
  stats_id  int NOT NULL,
  last_updated  DATETIME2 NOT NULL,
  rows  BIGINT NOT NULL,
  rows_sampled  BIGINT NOT NULL,
  steps int NOT NULL,
  unfiltered_rows   BIGINT NOT NULL,
  modification_counter  BIGINT NOT NULL,
  persisted_sample_percent  FLOAT  NULL
  , SampleDate DATETIME2 NOT NULL CONSTRAINT df_dm_db_stats_properties_SampleDate DEFAULT SYSUTCDATETIME()
)
GO
ALTER TABLE dbo.dm_db_stats_properties ADD StatsName NVARCHAR(128) NOT NULL CONSTRAINT df_dm_db_stats_properties_StatsName DEFAULT ('')

GO
CREATE TABLE dbo.dm_db_stats_histogram(
  dm_db_stats_histogramID INT IDENTITY(1,1) NOT NULL constraint PK_dm_db_stats_histogram PRIMARY KEY CLUSTERED,
  dm_db_stats_propertiesID INT NOT NULL, 
  object_id int NOT NULL,
  stats_id  int NOT NULL,
  step_number   int NOT NULL,
  range_high_key    sql_variant NOT NULL,
  range_rows    real    NOT NULL,
  equal_rows    real    NOT NULL,
  distinct_range_rows   bigint  NOT NULL,
  average_range_rows    REAL NOT NULL
)
go
ALTER TABLE dbo.dm_db_stats_histogram ADD CONSTRAINT fk_dm_db_stats_properties FOREIGN KEY(dm_db_stats_propertiesID) REFERENCES dbo.dm_db_stats_properties(dm_db_stats_propertiesID)
ALTER TABLE dbo.dm_db_stats_histogram ALTER COLUMN range_high_key SQL_VARIANT NULL
GO
Run Code Online (Sandbox Code Playgroud)

这是我用来记录统计数据的代码:

SET NOCOUNT ON
BEGIN TRY
  DROP TABLE #Stat_Header
END TRY
BEGIN CATCH
END CATCH
CREATE TABLE #Stat_Header (Name sysname, Updated DATETIME, Rows BIGINT, Rows_Sampled BIGINT, Steps SMALLINT, Density REAL, AverageKeyLength INT, StringIndex varchar(10)
, FilterExpression varchar(8000), unfiltered_rows bigint, persisted_sample_percent float)

BEGIN TRY
  DROP TABLE #Histogram
END TRY
BEGIN CATCH
END CATCH
CREATE TABLE #Histogram (Step_Number INT IDENTITY(1,1), range_high_key SQL_VARIANT, range_rows REAL NOT NULL, equal_rows    REAL NOT NULL, distinct_range_rows BIGINT NOT NULL, average_range_rows REAL NOT NULL)


DECLARE TableCursor CURSOR LOCAL STATIC FOR 
    SELECT t.name AS TableName, sc.name AS SchemaName
    FROM sys.tables t 
    INNER JOIN sys.schemas sc ON sc.schema_id = t.schema_id
    ORDER BY sc.name, t.name

DECLARE @sql NVARCHAR(MAX) = '', @TableName VARCHAR(100), @SchemaName VARCHAR(100), @loopCounter INT =0
SELECT GETDATE() AS StartDate
OPEN TableCursor
WHILE 1 =1 BEGIN
    FETCH TableCursor INTO @TableName, @SchemaName
    IF @@fetch_status <> 0 BREAK

    SELECT @sql = 'declare @Scope_Identity int = 0, @RowCount int
    SET NOCOUNT ON' + CHAR(13)
    SELECT @sql += '
    TRUNCATE TABLE #Stat_Header
    TRUNCATE TABLE #Histogram 
    INSERT INTO #Stat_Header(Name, Updated, Rows, Rows_Sampled, Steps, Density, AverageKeyLength, StringIndex, FilterExpression, unfiltered_rows/*, persisted_sample_percent*/)
    exec (''DBCC SHOW_STATISTICS ("' + @SchemaName + '.' + @TableName + '", "' + s.name +'") with STAT_HEADER'')
    INSERT INTO dbo.dm_db_stats_properties(databaseid, object_id, stats_id, last_updated, rows, rows_sampled, steps, unfiltered_rows, modification_counter, persisted_sample_percent, SampleDate, StatsName)
    SELECT db_id(), ' + LTRIM(t.object_id) + ', ' + LTRIM(s.stats_id) +', coalesce(sh.Updated, ''2000-01-01''), isnull(sh.rows,0), isnull(sh.Rows_Sampled,0), isnull(sh.steps,0), isnull(sh.unfiltered_rows,0), 0, sh.persisted_sample_percent, cast(''' + LTRIM(SYSUTCDATETIME()) + ''' as datetime2(7)), ''' + s.name + '''
    FROM #Stat_Header sh
    LEFT JOIN dbo.dm_db_stats_properties sp ON sp.object_id=' + LTRIM(t.object_id) + ' AND sp.stats_id=' + LTRIM(s.stats_id) + ' AND sh.Updated=sp.last_updated
    WHERE sp.dm_db_stats_propertiesID IS NULL
    SELECT @Scope_Identity = SCOPE_IDENTITY(), @RowCount=@@ROWCOUNT
    IF @RowCount>0 BEGIN 
        --raiserror (''here'', 10, 1) with nowait
        INSERT INTO #Histogram(range_high_key, range_rows, equal_rows, distinct_range_rows, average_range_rows)
        exec (''DBCC SHOW_STATISTICS ("' + @SchemaName + '.' + @TableName + '", "' + s.name +'") with HISTOGRAM'')
        INSERT INTO dbo.dm_db_stats_histogram(dm_db_stats_propertiesID, object_id, stats_id, step_number, range_high_key, range_rows, equal_rows, distinct_range_rows, average_range_rows)
        SELECT @Scope_Identity, ' + LTRIM(t.object_id) + ', ' + LTRIM(s.stats_id) +', h.Step_Number, h.range_high_key, h.range_rows, h.equal_rows, h.distinct_range_rows, h.average_range_rows
        FROM #Histogram h
    END 
    raiserror (''table = ' + @TableName + ', ' + s.name + ', rc= %i '', 10, 1, @RowCount) with nowait
    waitfor delay ''00:00:01''
    '
    FROM sys.stats AS s
    INNER JOIN sys.tables t ON t.object_id = s.object_id
    INNER JOIN sys.schemas sc ON sc.schema_id = t.schema_id
    WHERE t.name=@TableName
    AND sc.name = @SchemaName
    IF @loopCounter < 1 EXEC dbo.LongPrint @String=@sql
    SET @loopCounter +=1
    EXEC sp_executesql @sql
    --BREAK
END
DEALLOCATE TableCursor
SELECT GETDATE() AS StopDate
Run Code Online (Sandbox Code Playgroud)

我还有一个在 SQL Server 2016+17 上使用新 DMV 的解决方案

exec sp_foreachdb @command = N'
use ?
DECLARE @SampleDate DATETIME2 = SYSUTCDATETIME()

  INSERT INTO master.dbo.dm_db_stats_properties(DatabaseID, object_id, stats_id, last_updated, rows, rows_sampled, steps, unfiltered_rows, modification_counter, persisted_sample_percent, SampleDate, StatsName)
  SELECT db_id() as DatabaseID, s.object_id, s.stats_id, sp.last_updated, sp.rows, sp.rows_sampled, sp.steps, sp.unfiltered_rows, sp.modification_counter, sp.persisted_sample_percent, @SampleDate, s.name
  FROM ?.sys.stats AS s
  INNER JOIN ?.sys.tables t ON t.object_id = s.object_id
  INNER JOIN ?.sys.schemas sc ON sc.schema_id = t.schema_id
  CROSS APPLY ?.sys.dm_db_stats_properties(s.object_id, s.stats_id) AS sp
  LEFT JOIN master.dbo.dm_db_stats_properties T1 ON T1.object_id = s.object_id AND T1.stats_id = s.stats_id AND T1.last_updated=sp.last_updated
  WHERE sp.last_updated IS NOT NULL
  AND T1.last_updated IS NULL
  select @@rowcount as r1

INSERT INTO master.dbo.dm_db_stats_histogram(dm_db_stats_propertiesID, object_id, stats_id, step_number, range_high_key, range_rows, equal_rows, distinct_range_rows, average_range_rows)
SELECT sp.dm_db_stats_propertiesID, sp.object_id, sp.stats_id, hist.step_number, hist.range_high_key, hist.range_rows, hist.equal_rows, hist.distinct_range_rows, hist.average_range_rows
FROM master.dbo.dm_db_stats_properties sp
CROSS APPLY ?.sys.dm_db_stats_histogram(sp.[object_id], sp.stats_id) AS hist
WHERE sp.SampleDate = @SampleDate
  select @@rowcount as r2

', @exclude_list='tempdb, model', @print_dbname=1
Run Code Online (Sandbox Code Playgroud)


我真正的问题

现在,我如何根据我收集的数据编写查询,以显示
a) 哪些表、索引、列在我的采样期间不会(不会)发生很大变化?
b) 哪些表、索引、列受益于 WITH FULLSCAN 命令?

Joe*_*ish 6

b) 哪些表、索引、列受益于 WITH FULLSCAN 命令?

在我看来,您没有收集正确的数据来回答这个问题。如果您正在寻找仅通过分析数据库统计信息就可以进行的改进,我只能想到使用抽样统计信息而不是使用抽样统计信息可能导致的两个查询性能问题FULLSCAN

  1. 密度至少相差一个数量级。

    某些数据分布不适合 SQL Server 在将采样数据转换为直方图时做出的某些假设。在这些情况下,您最终的密度可能会降低 10 倍、100 倍甚至更多。这可能会导致在统计对象中使用密度向量的查询出现性能问题。

    您可以通过保存所有采样统计信息的密度信息、使用 FULLSCAN 收集所有相关列的统计信息以及比较两个结果集之间的密度来搜索可能的问题。任何太不准确的东西都可能会从完整收集统计数据中获益。

  2. 该查询容易受到升序键问题的影响

    您已将 SQL Server 2008 列为标记,因此这可能仍与您相关。考虑一列存储插入行时的日期时间。如果您的查询在该列上进行过滤以查找最近的数据,则它们可能正在搜索直方图之外的数据。使用旧版 CE,您最终可能会得到非常低的基数估计,并可能导致查询性能问题。

    这可以通过 FULLSCAN 统计数据来解决,尽管我觉得这有点矫枉过正。您可以多次收集具有相关数据类型的所有统计信息的完整统计信息(希望无需担心 VARCHAR 的升序​​键),并查看最大高键值如何变化。

对于上述两个问题,我想不出一种仅通过查看抽样统计数据以编程方式找到它们的方法。这就是为什么我说你没有收集正确的数据来回答你的问题。

如果你关心我的意见,真正减少统计维护的方法是分析性能不够好的查询的工作量,仔细分析根本原因以找出统计问题何时起作用,确定统计的准确类型问题,最后相应调整统计维护工作。