我有一个 800 GB 的表,我需要按年对其进行分区
这是当前的表
CREATE TABLE [dbo].[MessageInbox]
(
[Id] [INT] IDENTITY(1, 1) NOT FOR REPLICATION NOT NULL,
[Subject] [VARCHAR](250) NULL,
[MessageFrom] [VARCHAR](50) NULL,
[MessageText] [VARCHAR](MAX) NULL,
[DateReceived] [DATETIME] NOT NULL,
[DateCreated] [DATETIME] NOT NULL,
[ProfileId] [INT] NOT NULL,
[IsRead] [BIT] NULL,
[InstanceId] [INT] NULL,
[msgType] [VARCHAR](25) NULL,
[Tags] [VARCHAR](100) NULL,
[excerpt] [VARCHAR](500) NULL,
CONSTRAINT [PK_Inbox]
PRIMARY KEY CLUSTERED ([Id] ASC)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON,
ALLOW_PAGE_LOCKS = ON
) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY];
GO
Run Code Online (Sandbox Code Playgroud)
这就是我在分区时的方式
CREATE PARTITION FUNCTION [PartitioningByYear] (DATETIME)
AS RANGE RIGHT FOR VALUES
( '20150101',
'20160101',
'20170101',
'20180101',
'20190101',
'20200101',
'20210101',
'20220101',
'20230101',
'20240101'
);
---DROP PARTITION FUNCTION [PartitioningByYear];
CREATE PARTITION SCHEME PartitionByYear
AS PARTITION PartitioningByYear
TO
(
Year2014,
Year2015,
Year2016,
Year2017,
Year2018,
Year2019,
Year2020,
Year2021,
Year2022,
Year2023,
Year2024
);
CREATE TABLE [dbo].[MessageInbox]
(
[Id] [INT] IDENTITY(1, 1) NOT FOR REPLICATION NOT NULL,
[Subject] [VARCHAR](250) NULL,
[MessageFrom] [VARCHAR](50) NULL,
[MessageText] [VARCHAR](MAX) NULL,
[DateReceived] [DATETIME] NOT NULL,
[DateCreated] [DATETIME] NOT NULL,
[ProfileId] [INT] NOT NULL,
[IsRead] [BIT] NULL,
[InstanceId] [INT] NULL,
[msgType] [VARCHAR](25) NULL,
[Tags] [VARCHAR](100) NULL,
[excerpt] [VARCHAR](500) NULL
) ON PartitionByYear (DateCreated);
CREATE CLUSTERED INDEX CI_MessageInbox1
ON dbo.MessageInbox (DateCreated);
GO
CREATE NONCLUSTERED INDEX IX_messageinbox_id ON dbo.MessageInbox (Id);
Run Code Online (Sandbox Code Playgroud)
我的问题是
1- 这是创建分区表的最佳方式吗?
2- 我对没有将 ID 作为主键感到困惑,有没有办法将它添加到主键?
3- 将日期创建为聚集索引中的键。这不是会降低性能并为每个查询添加额外的键查找吗?
我想对表进行分区,因为如果我需要存档/删除旧的年份会更容易。而且我可以在分区上创建索引/维护,而不是在整个表上创建一个索引。
主键不需要与集群键相同。创建主键时的默认设置也是使其成为聚集索引,但这不是必需的。
试试这个CREATE TABLE语句:
CREATE TABLE [dbo].[MessageInbox]
(
[Id] [INT] NOT NULL
IDENTITY(1, 1)
NOT FOR REPLICATION
PRIMARY KEY NONCLUSTERED,
[Subject] [VARCHAR](250) NULL,
[MessageFrom] [VARCHAR](50) NULL,
[MessageText] [VARCHAR](MAX) NULL,
[DateReceived] [DATETIME] NOT NULL,
[DateCreated] [DATETIME] NOT NULL
INDEX MessageInbox_cx CLUSTERED,
[ProfileId] [INT] NOT NULL,
[IsRead] [BIT] NULL,
[InstanceId] [INT] NULL,
[msgType] [VARCHAR](25) NULL,
[Tags] [VARCHAR](100) NULL,
[excerpt] [VARCHAR](500) NULL
) ON PartitionByYear (DateCreated);
Run Code Online (Sandbox Code Playgroud)
这将使该[id]列成为主键,而该[DateCreated]列将成为聚集索引。
在不太了解您的数据的情况下,我会说将数据压缩添加到您的表和非聚集索引可能会有所帮助。您可以通过在CREATE TABLE语句中添加以下内容来实现:
CREATE TABLE [dbo].[MessageInbox]
(
[Id] [INT] NOT NULL
IDENTITY(1, 1)
NOT FOR REPLICATION
PRIMARY KEY NONCLUSTERED
WITH (DATA_COMPRESSION = PAGE),
[Subject] [VARCHAR](250) NULL,
[MessageFrom] [VARCHAR](50) NULL,
[MessageText] [VARCHAR](MAX) NULL,
[DateReceived] [DATETIME] NOT NULL,
[DateCreated] [DATETIME] NOT NULL
INDEX MessageInbox_cx CLUSTERED
WITH (DATA_COMPRESSION = PAGE),
[ProfileId] [INT] NOT NULL,
[IsRead] [BIT] NULL,
[InstanceId] [INT] NULL,
[msgType] [VARCHAR](25) NULL,
[Tags] [VARCHAR](100) NULL,
[excerpt] [VARCHAR](500) NULL
) ON PartitionByYear (DateCreated);
Run Code Online (Sandbox Code Playgroud)