Jon*_*ssi 4 database-design normalization dynamic-sql sql-server-2008 non-clustered-index
我有一个崭露头角的开发人员,他非常热衷于他称之为"矩阵"的东西
我正在寻找同行的洞察力
简而言之,这就是我们所拥有的:
- 1个高度非规范化的表,大约120列
- 数据点包括帐户,客户,家庭,关系,产品,员工等...
- 每列一个索引:大约120个非聚集索引
-今天索引使用的数据库中大约90%的空间都是此表的索引
- 今天大约有150万行有很多空
- 表加载了一个存储过程,其核心是动态SQL
- 所有字段名都是通用的,不是描述数据
- 数据字典类型表与动态SQL一起使用以将任何数据点加载到任何字段
- 字段映射不是静态的:今天列dim_0001是客户名称,但明天可能是别的
- 没有主键
- 没有外键
- 没有实际约束(例如,所有字段都可以为空)
该表的参数:
- 使编写查询更简单,因为它消除了编写某些连接的需要
预期用途:
- 最终用户层,将成为Business Objects中Universe构建的核心组件
- 后ETL过程开发
我的建议要么杀死今天的进程(在测试环境中进行早期开发),要么将其移至测试的下一步.
根据我所做的研究,我的教育和经验,我不支持它,并希望一旦依赖这些表的一个或两个进程迁移到另一个解决方案,表就会被删除.
下面的脚本供您参考(我仅限于一个索引示例).
您可以提供的任何见解(甚至只是一个单词的意见)都是有价值的
-- The Matrix
CREATE TABLE [z005497].[tblMatrix](
[as_of_dt] [datetime] NOT NULL,
[dim_0001] [varchar](100) NULL,
[dim_0002] [varchar](103) NULL,
[dim_0003] [varchar](100) NULL,
[dim_0004] [varchar](100) NULL,
[dim_0005] [varchar](100) NULL,
[dim_0006] [varchar](100) NULL,
[dim_0007] [varchar](100) NULL,
[dim_0008] [varchar](100) NULL,
[dim_0009] [varchar](100) NULL,
[dim_0010] [varchar](100) NULL,
[dim_0011] [varchar](100) NULL,
[dim_0012] [varchar](100) NULL,
[dim_0013] [varchar](100) NULL,
[dim_0014] [varchar](100) NULL,
[dim_0015] [varchar](100) NULL,
[dim_0016] [varchar](100) NULL,
[dim_0017] [varchar](103) NULL,
[dim_0018] [varchar](103) NULL,
[dim_0019] [varchar](103) NULL,
[dim_0020] [varchar](103) NULL,
[dim_0021] [varchar](103) NULL,
[dim_0022] [varchar](103) NULL,
[dim_0023] [varchar](103) NULL,
[dim_0024] [varchar](103) NULL,
[dim_0025] [varchar](103) NULL,
[dim_0026] [varchar](11) NULL,
[dim_0027] [varchar](11) NULL,
[dim_0028] [varchar](11) NULL,
[dim_0029] [varchar](11) NULL,
[dim_0030] [varchar](11) NULL,
[dim_0031] [varchar](11) NULL,
[dim_0032] [varchar](11) NULL,
[dim_0033] [varchar](11) NULL,
[dim_0034] [varchar](11) NULL,
[dim_0035] [varchar](11) NULL,
[dim_0036] [varchar](11) NULL,
[dim_0037] [varchar](11) NULL,
[dim_0038] [varchar](11) NULL,
[dim_0039] [varchar](11) NULL,
[dim_0040] [varchar](11) NULL,
[dim_0041] [varchar](11) NULL,
[dim_0042] [varchar](11) NULL,
[dim_0043] [varchar](11) NULL,
[dim_0044] [varchar](11) NULL,
[dim_0045] [varchar](11) NULL,
[dim_0046] [varchar](11) NULL,
[dim_0047] [varchar](11) NULL,
[dim_0048] [varchar](11) NULL,
[dim_0049] [varchar](11) NULL,
[dim_0050] [varchar](11) NULL,
[dim_0051] [varchar](11) NULL,
[dim_0052] [varchar](11) NULL,
[dim_0053] [varchar](11) NULL,
[dim_0054] [varchar](5) NULL,
[dim_0055] [varchar](5) NULL,
[dim_0056] [varchar](5) NULL,
[dim_0057] [varchar](5) NULL,
[dim_0058] [varchar](5) NULL,
[dim_0059] [varchar](5) NULL,
[dim_0060] [varchar](5) NULL,
[dim_0061] [varchar](5) NULL,
[dim_0062] [varchar](5) NULL,
[dim_0063] [varchar](5) NULL,
[dim_0064] [varchar](5) NULL,
[dim_0065] [varchar](5) NULL,
[dim_0066] [varchar](5) NULL,
[dim_0067] [varchar](5) NULL,
[dim_0068] [varchar](5) NULL,
[dim_0069] [varchar](5) NULL,
[dim_0070] [varchar](5) NULL,
[dim_0071] [varchar](5) NULL,
[dim_0072] [varchar](5) NULL,
[dim_0073] [varchar](5) NULL,
[dim_0074] [varchar](5) NULL,
[dim_0075] [varchar](5) NULL,
[dim_0076] [varchar](5) NULL,
[dim_0077] [varchar](5) NULL,
[dim_0078] [varchar](5) NULL,
[dim_0079] [varchar](5) NULL,
[dim_0080] [varchar](5) NULL,
[dim_0081] [varchar](5) NULL,
[dim_0082] [varchar](5) NULL,
[dim_0083] [varchar](5) NULL,
[dim_0084] [int] NULL,
[dim_0085] [int] NULL,
[dim_0086] [int] NULL,
[dim_0087] [int] NULL,
[dim_0088] [int] NULL,
[dim_0089] [int] NULL,
[dim_0090] [int] NULL,
[dim_0091] [int] NULL,
[dim_0092] [int] NULL,
[dim_0093] [int] NULL,
[dim_0094] [varchar](12) NULL,
[dim_0095] [varchar](12) NULL,
[dim_0096] [varchar](12) NULL,
[dim_0097] [varchar](120) NULL,
[dim_0098] [varchar](120) NULL,
[dim_0099] [varchar](120) NULL,
[dim_0100] [numeric](20, 0) NULL,
[dim_0101] [varchar](20) NULL,
[dim_0102] [varchar](20) NULL,
[dim_0103] [varchar](20) NULL,
[dim_0104] [varchar](20) NULL,
[dim_0105] [varchar](20) NULL,
[dim_0106] [varchar](20) NULL,
[dim_0107] [varchar](20) NULL,
[dim_0108] [varchar](20) NULL,
[dim_0109] [varchar](20) NULL,
[dim_0110] [varchar](20) NULL,
[dim_0111] [varchar](20) NULL,
[dim_0112] [varchar](20) NULL,
[dim_0113] [varchar](20) NULL,
[dim_0114] [varchar](20) NULL,
[dim_0115] [varchar](20) NULL,
[dim_0116] [varchar](20) NULL,
[dim_0117] [varchar](20) NULL,
[dim_0118] [varchar](20) NULL,
[dim_0119] [varchar](20) NULL,
[dim_0120] [varchar](20) NULL,
[lastLoad] [datetime] NULL
) ON [PRIMARY]
-- Index example
CREATE NONCLUSTERED INDEX [idx_dim_0001 (not unique)] ON [z005497].[tblMatrix]
(
[dim_0001] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
-- The configuration table from which developers would find out what is in the Matrix
CREATE TABLE [z005497].[tblMatrixCfg](
[dimId] [int] IDENTITY(100000,1) NOT NULL,
[colName] [varchar](25) NOT NULL,
[dataType] [varchar](25) NOT NULL,
[dimName] [varchar](25) NOT NULL,
[dimDesc] [varchar](500) NOT NULL,
[dimpath] [varchar](5000) NOT NULL,
[loadDate] [datetime] NOT NULL,
[modUser] [varchar](100) NOT NULL,
[modDate] [datetime] NOT NULL,
CONSTRAINT [PK_tblMatrixCfg_1] PRIMARY KEY CLUSTERED
(
[dimId] ASC,
[colName] ASC,
[dimName] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
Run Code Online (Sandbox Code Playgroud)
如果可以,请杀死它.
此外,该开发人员需要更多的经验.他/她应该在另一家公司买到它.
它基本上违反了很多我不知道从哪里开始的事情.
即使你最终打击了一个高度规范化的模型,这种模式是盲目地追随某人的最佳实践,但它无法与这个设计将要创造的灾难进行比较.
举一个例子来说明Cade的意思是"我不知道从哪里开始":
"今天列dim_0001是客户名称,但明天可能还有别的东西"
这通常也意味着在用户接受系统中,dim_0001可以是客户名称(系统似乎可以工作并被接受),然后您转到生产,dim_0001将成为总统的妻子左右的名字,并且然后需要花费数小时的会议来试图弄清楚(a)问题出在哪里,以及(b)如何在尽可能短的时间内解决问题.
((b)通常相当于用"if col_name = dim_0001"之类的东西修补代码,然后不要将其视为矩阵所说的内容,而是将其视为硬编码的内容而不是"."