Mon*_*RPG 1 performance sql-server sql-server-2014 query-performance
这是我的完整表格:
CREATE TABLE [dbo].[tblCrawlUrls](
[cl_IdUrl] [int] IDENTITY(1,1) NOT NULL,
[cl_CrawlNormalizedUrl] [nvarchar](200) NOT NULL,
[cl_RooSiteId] [smallint] NOT NULL,
[cl_ExploreDate] [datetime] NOT NULL CONSTRAINT [DF_tblCrawlUrls_cl_ExploreDate] DEFAULT (sysutcdatetime()),
[cl_LastCrawlDate] [datetime] NOT NULL CONSTRAINT [DF_tblCrawlUrls_cl_LastCrawlDate] DEFAULT ('2000-08-11 15:18:47.407'),
[cl_CrawlSource] [nvarchar](max) NOT NULL CONSTRAINT [DF_tblCrawlUrls_cl_CrawlSource] DEFAULT ('null'),
[cl_CrawlOrgUrl] [nvarchar](200) NOT NULL CONSTRAINT [DF_tblCrawlUrls_cl_CrawlOrgUrl] DEFAULT ('null'),
[cl_ExploredURL] [nvarchar](200) NOT NULL CONSTRAINT [DF_tblCrawlUrls_cl_ExploredURL] DEFAULT ('null'),
[cl_Ignored_By_Containing_Word] [bit] NOT NULL CONSTRAINT [DF_tblCrawlUrls_cl_Ignored_By_Containing_Word] DEFAULT ((0)),
[cl_CrawlFailedTimes] [int] NOT NULL CONSTRAINT [DF_tblCrawlUrls_cl_CrawlFailedTimes] DEFAULT ((0)),
[cl_TotalCrawlTimes] [int] NOT NULL CONSTRAINT [DF_tblCrawlUrls_cl_TotalCrawlTimes] DEFAULT ((0)),
[cl_UpdatedTimes] [int] NOT NULL CONSTRAINT [DF_tblCrawlUrls_cl_UpdatedTimes] DEFAULT ((0)),
[cl_DuplicateUrl_ByCanonical] [bit] NOT NULL CONSTRAINT [DF_tblCrawlUrls_cl_DuplicateUrl_ByCanonical] DEFAULT ((0)),
[cl_PageProcessed] [bit] NOT NULL CONSTRAINT [DF_tblCrawlUrls_cl_PageProcessed] DEFAULT ((0)),
[cl_LastProcessDate] [datetime] NOT NULL CONSTRAINT [DF_tblCrawlUrls_cl_LastProcessDate] DEFAULT ('2000-08-11 15:18:47.407'),
[cl_PossibleProductPage] [bit] NOT NULL CONSTRAINT [DF_tblCrawlUrls_cl_PossibleProductPage] DEFAULT ((0)),
[cl_CertainlyNotProductPage] [bit] NOT NULL CONSTRAINT [DF_tblCrawlUrls_cl_CertainlyNotProductPage] DEFAULT ((0)),
[cl_IsProductPage] [bit] NOT NULL CONSTRAINT [DF_tblCrawlUrls_cl_IsProductPage] DEFAULT ((0)),
[cl_Determined_Not_A_Product_Page] [bit] NOT NULL CONSTRAINT [DF_tblCrawlUrls_cl_NotProduct_Page] DEFAULT ((0)),
[cl_FreeCargo] [bit] NOT NULL CONSTRAINT [DF_tblCrawlUrls_cl_FreeCargo] DEFAULT ((0)),
[cl_ProductPrice_TL] [int] NOT NULL CONSTRAINT [DF_tblCrawlUrls_cl_ProductPrice] DEFAULT ((0)),
[cl_ProductCode] [nvarchar](200) NULL,
[cl_ProductImageLink] [nvarchar](200) NULL,
[cl_ProductIdCode] [nvarchar](200) NULL,
[cl_ProductCategoriesAsText] [nvarchar](200) NULL,
[cl_ProductDetailedExplanation] [nvarchar](max) NULL,
[cl_ProductFeatures_Wrapped] [nvarchar](max) NULL,
CONSTRAINT [PK_tblCrawlUrls] PRIMARY KEY CLUSTERED
(
[cl_IdUrl] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 85) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
CREATE TABLE [dbo].[tblRootSites](
[cl_RootSiteId] [smallint] NOT NULL,
[cl_SiteRootUrl] [nvarchar](200) NOT NULL,
[cl_Disabled] [bit] NOT NULL CONSTRAINT [DF_tblRootSites_Disabled] DEFAULT ((0)),
[cl_AlexaRank_TR] [int] NOT NULL CONSTRAINT [DF_tblRootSites_cl_AlexaRank] DEFAULT ((0)),
[cl_RegisterTime] [datetime] NOT NULL CONSTRAINT [DF_tblRootSites_cl_RegisterTime] DEFAULT (sysutcdatetime()),
[cl_PriceDelimeter] [varchar](1) NOT NULL CONSTRAINT [DF_tblRootSites_cl_PriceDelimeter] DEFAULT ('.'),
[cl_PriceIgnoreDelimeter] [varchar](1) NOT NULL CONSTRAINT [DF_tblRootSites_cl_PriceIgnoreDelimeter] DEFAULT (','),
[cl_CertainProductPageDefiner] [nvarchar](200) NOT NULL CONSTRAINT [DF_tblRootSites_cl_CertainProductPageDefiner] DEFAULT ('null'),
[cl_ProductCategoryListing_Priority] [smallint] NOT NULL CONSTRAINT [DF_tblRootSites_cl_ProductCategoryListing_Priority] DEFAULT ((0)),
[cl_Ignore_Words_From_Category] [nvarchar](200) NOT NULL CONSTRAINT [DF_tblRootSites_cl_Ignore_From_Category] DEFAULT ('null'),
[cl_Ignore_Words_From_Urls] [nvarchar](200) NOT NULL CONSTRAINT [DF_tblRootSites_cl_Ignore_Urls_Parameters] DEFAULT ('null'),
[cl_Use_Last_Category_As_A_Product_Code] [bit] NOT NULL CONSTRAINT [DF_tblRootSites_cl_Use_Last_Category_As_A_Product_Code] DEFAULT ((0)),
[cl_Use_Custom_Character_Encoding] [int] NOT NULL CONSTRAINT [DF_tblRootSites_cl_Use_Custom_Character_Encoding] DEFAULT ((0)),
[cl_IgnorePages_Words] [nvarchar](200) NOT NULL CONSTRAINT [DF_tblRootSites_cl_IgnorePages_Words] DEFAULT ('null'),
[cl_IgnoreLastCategory] [bit] NOT NULL CONSTRAINT [DF_tblRootSites_cl_IgnoreLastCategory] DEFAULT ((0)),
CONSTRAINT [PK_tblRootSites] PRIMARY KEY CLUSTERED
(
[cl_RootSiteId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 85) ON [PRIMARY]
) ON [PRIMARY]
Run Code Online (Sandbox Code Playgroud)
这是我的 SQL 查询:
SELECT TOP 1000 cl_idurl,
cl_roositeid,
cl_crawlsource,
cl_crawlorgurl
FROM tblcrawlurls
WHERE cl_pageprocessed = 0
AND cl_totalcrawltimes > 0
AND cl_certainlynotproductpage = 0
AND cl_duplicateurl_bycanonical = 0
AND cl_roositeid IN (SELECT cl_rootsiteid
FROM tblrootsites
WHERE cl_disabled = 0)
ORDER BY cl_lastprocessdate ASC
Run Code Online (Sandbox Code Playgroud)
这是我假设会使用的索引,但它没有被使用。
CREATE NONCLUSTERED INDEX [TCT-PP-LPD-CNPP-DUBC] ON [dbo].[tblCrawlUrls]
(
[cl_TotalCrawlTimes] ASC,
[cl_PageProcessed] ASC,
[cl_LastProcessDate] ASC,
[cl_CertainlyNotProductPage] ASC,
[cl_DuplicateUrl_ByCanonical] ASC
)
INCLUDE ( [cl_IdUrl],
[cl_RooSiteId],
[cl_CrawlOrgUrl]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 85)
Run Code Online (Sandbox Code Playgroud)
在我的查询中,我将这些用作限制(在 where 子句中)
cl_pageprocessed
cl_totalcrawltimes
cl_certainlynotproductpage
cl_duplicateurl_bycanonical
Run Code Online (Sandbox Code Playgroud)
我假设使用包含这些列的索引来确定可能的主键 ID 应该比总聚集索引扫描更快。但是 SQL Server 并没有这样做。
所以我的逻辑不正确,这就是我想要学习的。为什么 SQL Server 选择不使用我的索引?不使用我的索引意味着我的数据库和硬盘上有额外的不必要的负载。
下面是执行计划:
小智 5
索引中的第一列是 cl_totalcrawltimes。在 WHERE 子句中,您使用此列来定义较低的范围 (cl_totalcrawltimes > 0)。
根据数据分布,这很可能会导致索引扫描而不是索引查找。此外, cl_crawlsource 不是索引的一部分(最好是包含部分),因此无论如何这都需要聚集索引键查找。
因此,SQL Server 会忽略索引,因为执行聚集索引扫描似乎更有效。
为获得最佳结果,使您查询的那些列与非聚集索引的第一列完全匹配。(下一列应该是不等式列还是按列排序取决于优化器喜欢什么,如果有的话。)