针对非唯一聚簇索引的搜索的逻辑读取

Mar*_*ith 6 sql-server indexing performance

对于表定义

CREATE  TABLE Accounts
(
AccountID INT ,
Filler CHAR(1000)
)
Run Code Online (Sandbox Code Playgroud)

包含21行(每个AccountId值为7行4,6,7).

它有1个根页面和4个叶子页面

index_depth page_count           index_level
----------- -------------------- -----------
2           4                    0
2           1                    1
Run Code Online (Sandbox Code Playgroud)

根页看起来像

FileId      PageId      ROW         LEVEL       ChildFieldId ChildPageId AccountId (KEY) UNIQUIFIER (KEY) KeyHashValue
----------- ----------- ----------- ----------- ------------ ----------- --------------- ---------------- ------------------------------
1           121         0           1           1            119         NULL            NULL             NULL
1           121         1           1           1            151         6               0                NULL
1           121         2           1           1            175         6               3                NULL
1           121         3           1           1            215         7               1                NULL
Run Code Online (Sandbox Code Playgroud)

AccountId这些页面上的记录的实际分布是

AccountID   page_id     Num
----------- ----------- -----------
4           119         7
6           151         3
6           175         4
7           175         1
7           215         6
Run Code Online (Sandbox Code Playgroud)

查询

SELECT AccountID 
FROM Accounts 
WHERE AccountID IN (4,6,7) 
Run Code Online (Sandbox Code Playgroud)

提供以下IO统计信息

Table 'Accounts'. Scan count 3, logical reads 13
Run Code Online (Sandbox Code Playgroud)

为什么?

我想为每次搜索它会寻找可能包含该值的第一页,然后(如果需要)继续沿链表继续,直到它发现第一行不等于搜索值.

但是,这最多只能增加10次页面访问

4)  Root Page -> Page 119 -> Page 151             (Page 151 Contains a 6 so should stop)
6)  Root Page -> Page 119 -> Page 151 -> Page 175 (Page 175 Contains a 7 so should stop)
7)  Root Page -> Page 175 -> Page 215             (No more pages)      
Run Code Online (Sandbox Code Playgroud)

那么附加3的原因是什么呢?

完整的脚本来重现

USE tempdb

SET NOCOUNT ON;

CREATE  TABLE Accounts
(
AccountID INT ,
Filler CHAR(1000)
)

CREATE CLUSTERED INDEX ix ON Accounts(AccountID)


INSERT INTO Accounts(AccountID)
SELECT C
FROM (SELECT 4 UNION ALL SELECT 6 UNION ALL SELECT 7) Vals(C)
CROSS JOIN (SELECT TOP (7) 1 FROM master..spt_values) T(X)

DECLARE @AccountID INT

SET STATISTICS IO ON
SELECT @AccountID=AccountID FROM Accounts WHERE AccountID IN (4,6,7) 
SET STATISTICS IO OFF

SELECT index_depth,page_count,index_level
FROM
sys.dm_db_index_physical_stats (2,OBJECT_ID('Accounts'), DEFAULT,DEFAULT, 'DETAILED')

SELECT AccountID, P.page_id, COUNT(*) AS Num
FROM Accounts
CROSS APPLY sys.fn_PhysLocCracker(%%physloc%%) P
GROUP BY AccountID, P.page_id
ORDER BY AccountID, P.page_id

DECLARE @index_info  TABLE
(PageFID  VARCHAR(10), 
  PagePID VARCHAR(10),   
  IAMFID   TINYINT, 
  IAMPID  INT, 
  ObjectID  INT,
  IndexID  TINYINT,
  PartitionNumber TINYINT,
  PartitionID BIGINT,
  iam_chain_type  VARCHAR(30),    
  PageType  TINYINT, 
  IndexLevel  TINYINT,
  NextPageFID  TINYINT,
  NextPagePID  INT,
  PrevPageFID  TINYINT,
  PrevPagePID INT, 
  PRIMARY KEY (PageFID, PagePID));

INSERT INTO @index_info 
    EXEC ('DBCC IND ( tempdb, Accounts, -1)'  ); 

DECLARE @DynSQL NVARCHAR(MAX) = 'DBCC TRACEON (3604);'
SELECT @DynSQL = @DynSQL + '
DBCC PAGE(tempdb, ' + PageFID + ', ' + PagePID + ', 3); '
FROM @index_info     
WHERE IndexLevel = 1

SET @DynSQL = @DynSQL + '
DBCC TRACEOFF(3604); '

CREATE TABLE #index_l1_info  
(FileId  INT, 
  PageId INT,   
  ROW   INT, 
  LEVEL  INT, 
  ChildFieldId  INT,
  ChildPageId INT,
  [AccountId (KEY)] INT,
  [UNIQUIFIER (KEY)] INT,
  KeyHashValue  VARCHAR(30));

INSERT INTO #index_l1_info  
EXEC(@DynSQL)


SELECT *
FROM #index_l1_info

DROP TABLE #index_l1_info
DROP TABLE Accounts
Run Code Online (Sandbox Code Playgroud)

Mar*_*ith 5

只是为了以答案形式提供答案,而不是作为评论中的讨论...

由于预读机制,会出现额外的读取。这会扫描叶级的父页面,以防它需要发出异步 IO 将叶级页面带入缓冲区缓存,以便在范围查找到达它们时准备就绪。

可以使用跟踪标志 652 来禁用该机制(服务器范围内)并验证读取次数现在是否如预期的那样正好为 10。