大型30亿关系数据库的SQL数据库模式设计

4 sql performance

让你的极客.你能解决这个问题吗?

我正在为SQL Server 2008 R2 Ed设计产品数据库.(不是Enterprise Ed.)将用于存储超过30,000种不同产品的定制产品配置.该数据库一次最多可包含500个用户.

这是设计问题......

每个产品都有一系列零件(每个产品最多50个零件).
因此,如果我有30,000个产品,每个产品最多可以有50个零件,那就是150万个不同的产品到零件关系

                                  …or as an equation…
Run Code Online (Sandbox Code Playgroud)

30,000(产品)X 50(零件)= 150万件产品对零件记录.

…而如果…

每个零件最多可以有2000个饰面选项(饰面是油漆颜色).

注意:用户在运行时只会选择一个完成.我需要存储的2000个完成选项是特定产品特定部分的允许选项.

因此,如果我有150万个不同的产品到零件的关系/记录,并且每个零件都可以有多达2,000个完成,那就是30亿个允许的产品到零件,以完成关系/记录

                                  …or as an equation…
Run Code Online (Sandbox Code Playgroud)

150万(零件)x 2,000(完成)= 30亿产品到零件的完成记录.

如何设计此数据库,以便我可以对特定产品执行快速有效的查询,并返回其零件清单和每个零件的所有允许的完成,而无需30亿产品到零件的完成记录?读取时间比写入时间更重要.

如果您有大型数据库的经验,请发表您的想法/建议.

谢谢!

Rem*_*anu 6

为什么这甚至是极具挑战性的?如果有一件事是关系数据库所擅长的,这将是正是你所描述的问题:3桌和2个多到很多关系.只有一些失控的完整卡特兹联盟才能运行,才会出现"30亿"的数字.只需进行基本的标准化设计:

:setvar dbname test
:setvar PRODUCTSCOUNT 30000
:setvar PARTSCOUNT 5000
:setvar FINISHESCOUNT 2000
:setvar PRODUCTSPARTS 50
:setvar PARTFINISHES 1

use master;
set nocount on;
go

rollback
go

:on error exit

if db_id('$(dbname)') is not null
begin
    alter database [$(dbname)] set single_user with rollback immediate;
    drop database [$(dbname)];
end 
go

create database [$(dbname)] 
    on (name = test_data, filename='c:\temp\test.mdf', size = 10GB)
    log on (name = test_log, filename='c:\temp\test.ldf', size = 100MB);
go

use [$(dbname)];
go

create table Products (
    Product_Id int not null identity(0,1) primary key,
    Description varchar(256));
go      

create table Parts (
    Part_Id int not null identity(0,1) primary key,
    Description varchar(256));

create table Finishes (
    Finish_Id smallint not null identity(0,1) primary key,
    Description varchar(256));

create table ProductParts (
    Product_Id int not null,
    Part_Id int not null,
    constraint fk_products_parts_product
        foreign key (Product_Id)
        references Products (Product_Id),
    constraint fk_product_parts_part 
        foreign key (Part_Id)
        references Parts (Part_Id),
    constraint pk_product_parts
        primary key (Product_Id, Part_Id));

create table PartFinishes (
    Part_Id int not null,
    Finish_Id smallint not null,
    constraint fk_part_finishes_part
        foreign key (Part_Id)
        references Parts (Part_Id),
    constraint fk_part_finishes_finish
        foreign key (Finish_Id)
        references Finishes (Finish_Id),
    constraint pk_part_finishes
        primary key (Part_Id, Finish_Id));
go      

-- populate Products
declare @cnt int = 0, @description varchar(256);
begin transaction;
while @cnt < $(PRODUCTSCOUNT)
begin
    set @description = 'Product ' + cast(@cnt as varchar(10));
    insert into Products (Description) values (@description);
    set @cnt += 1;
    if @cnt % 1000 = 0
    begin
        commit;
        raiserror (N'Inserted %d products', 0,1, @cnt);
        begin transaction;
    end
end
commit;
raiserror (N'Done. %d products', 0,1, @cnt);
go

-- populate Parts
declare @cnt int = 0, @description varchar(256);
begin transaction;
while @cnt < $(PARTSCOUNT)
begin
    set @description = 'Part ' + cast(@cnt as varchar(10));
    insert into Parts (Description) values (@description);
    set @cnt += 1;
    if @cnt % 1000 = 0
    begin
        commit;
        raiserror (N'Inserted %d parts', 0,1, @cnt);
        begin transaction;
    end
end
commit;
raiserror (N'Done. %d parts', 0,1, @cnt);
go

-- populate Finishes
declare @cnt int = 0, @description varchar(256);
begin transaction;
while @cnt < $(FINISHESCOUNT)
begin
    set @description = 'Finish ' + cast(@cnt as varchar(10));
    insert into Finishes (Description) values (@description);
    set @cnt += 1;
    if @cnt % 1000 = 0
    begin
        commit;
        raiserror (N'Inserted %d finishes', 0,1, @cnt);
        begin transaction;
    end
end
raiserror (N'Done. %d finishes', 0,1, @cnt);
commit;
go

-- populate product parts
declare @cnt int = 0, @parts int = 0, @part int, @product int = 0;
begin transaction;
while @product < $(PRODUCTSCOUNT)
begin
    set @parts = rand() * ($(PRODUCTSPARTS)-1) + 1;
    set @part = rand() * $(PARTSCOUNT);     
    while 0 < @parts 
    begin
        insert into ProductParts (Product_Id, Part_Id)
            values (@product, @part);
        set @parts -= 1;
        set @part += rand()*10+1;
        if @part >= $(PARTSCOUNT)
            set @part = rand()*10;
        set @cnt += 1;
        if @cnt % 1000 = 0
        begin
            commit;
            raiserror (N'Inserted %d product-parts', 0,1, @cnt);
            begin transaction;
        end
    end
    set @product += 1;
end
commit;
raiserror (N'Done. %d product-parts', 0,1, @cnt);
go      

-- populate part finishes
declare @cnt int = 0, @part int = 0, @finish int, @finishes int;
begin transaction;
while @part < $(PARTSCOUNT)
begin
    set @finishes = rand() * ($(PARTFINISHES)-1) + 1;
    set @finish = rand() * $(FINISHESCOUNT);
    while 0 < @finishes 
    begin
        insert into PartFinishes (Part_Id, Finish_Id)
            values (@part, @finish);
        set @finish += rand()*10+1;
        if @finish >= $(FINISHESCOUNT)
            set @finish = rand()*10+1;
        set @finishes -= 1;
        set @cnt += 1;
        if @cnt % 1000 = 0
        begin
            commit;
            raiserror (N'Inserted %d part-finishes', 0,1, @cnt);
            begin transaction;
        end
    end
    set @part += 1;
end
commit;
raiserror (N'done. %d part-finishes', 0,1, @cnt);
go
Run Code Online (Sandbox Code Playgroud)

现在,如果我们通过基本测试运行,结果非常好:

set statistics time on;
set statistics io on;

declare @product int = rand()*30000;
select *
from Products po
join ProductParts pp on po.Product_Id = pp.Product_Id
join Parts pa on pa.Part_Id = pp.Part_Id
join PartFinishes pf on pf.Part_Id = pa.Part_Id
join Finishes f on pf.Finish_id = f.Finish_Id
where po.Product_Id = @product;
Run Code Online (Sandbox Code Playgroud)

执行时间:

(33 row(s) affected)
Table 'Finishes'. Scan count 0, logical reads 66, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'Parts'. Scan count 0, logical reads 66, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'PartFinishes'. Scan count 33, logical reads 66, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'ProductParts'. Scan count 1, logical reads 3, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'Products'. Scan count 0, logical reads 2, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

 SQL Server Execution Times:
   CPU time = 0 ms,  elapsed time = 5 ms.
Run Code Online (Sandbox Code Playgroud)

这是随机产品的5ms执行时间.这远不是"服务器",我在笔记本电脑上运行它.没有惊喜,所有访问都被这些表上的聚簇索引所覆盖.我会让你为500个用户设置一个压力测试,并自己测量它在并发下的表现.我希望它能很好地保持下去.