生成并插入 100 万行到简单表中

Lua*_*ynh 8 sql-server

描述:

我尝试在 MSSQL 2012 Express 上将 100 万行插入到空表中。这是我的脚本:

-- set statistics time off
drop table t1
create table t1 (id int, a text, b text) 
go

-- #1 - 1,000,000 - 30s -> 45s
with ID(number) as
(
    select 1 as number
    union all
    select number + 1
    from ID
    where number < 1000000 + 1
)
insert into t1
    select number, 'a_' + cast (number as varchar), 'b_' + cast (number/2 as varchar)
    from ID  
    option(maxrecursion 0)


-- #2 - 1 million rows => ~140,000 rows = 120s (have to cancel query)
declare @count int
set @count = 0
while @count < 1000000
begin
    set @count = @count + 1
    insert into t1 
        values(@count, 'a_' + cast (@count as varchar), 'b_' + cast (@count/2 as varchar))
end

-- #3 - ~1,300,000 rows - 18s -> 20s  

with temp as 
(
    SELECT  ROW_NUMBER() OVER(ORDER BY a.object_id) as tcount 
    from sys.all_columns a,  sys.all_columns b
    where a.object_id = b.object_id  
) 
insert into t1
    select tcount, 'a_' + cast (tcount as varchar), 'b_' + cast (tcount/2 as varchar) 
    from temp 
go

declare @count int
set @count = 0
while @count < 3
begin
    with temp as (select max(id) + 1 as max_id from t1)
    insert into t1
        select max_id, 'a_' + cast (max_id as varchar), 'b_' + cast (max_id/2 as varchar) 
        from t1, temp 
    set @count = @count + 1
end

-- #4 -- 1,000,000 = 3s -> 4s (have to drop t1 first)
with a(k) as
(
select 1 as k
union all
select k + 1 from a where k < 99 + 1
) , 
t2 as (
select row_number() over(order by x.k) as k
from a x , a y , a z 
) 
select k as id , 'a_' + cast (k as varchar) as a, 'b_' + cast (k/2 as varchar) as b into t1
from t2
Run Code Online (Sandbox Code Playgroud)

题:

经过研究,我找到了4个解决方案。有没有更好的解决方案(不使用文件中的复制数据)?

dno*_*eth 18

Itzik Ben-Gan 使用以下方法 这可能是他找到的最快的方法,而且他很聪明 :-)

WITH
  L0   AS (SELECT c FROM (SELECT 1 UNION ALL SELECT 1) AS D(c)), -- 2^1
  L1   AS (SELECT 1 AS c FROM L0 AS A CROSS JOIN L0 AS B),       -- 2^2
  L2   AS (SELECT 1 AS c FROM L1 AS A CROSS JOIN L1 AS B),       -- 2^4
  L3   AS (SELECT 1 AS c FROM L2 AS A CROSS JOIN L2 AS B),       -- 2^8
  L4   AS (SELECT 1 AS c FROM L3 AS A CROSS JOIN L3 AS B),       -- 2^16
  L5   AS (SELECT 1 AS c FROM L4 AS A CROSS JOIN L4 AS B),       -- 2^32
  Nums AS (SELECT ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS k FROM L5)

select k as id , 'a_' + cast (k as varchar) as a, 'b_' + cast (k/2 as varchar) as b into t1
from nums
where k <= 1000000
Run Code Online (Sandbox Code Playgroud)


Pau*_*ite 10

dnoeth答案的变体:

WITH Ten(N) AS 
(
    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL 
    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL 
    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
)   
SELECT
    id = IDENTITY(int, 1, 1)
INTO dbo.T1
FROM Ten T10
CROSS JOIN Ten T100
CROSS JOIN Ten T1000
CROSS JOIN Ten T10000
CROSS JOIN Ten T100000
CROSS JOIN Ten T1000000;

ALTER TABLE dbo.T1
ADD a AS CONVERT(varchar(11), id);

ALTER TABLE dbo.T1
ADD b AS CONVERT(varchar(11), id / 2);
Run Code Online (Sandbox Code Playgroud)

这避免了存储 a 和 b 的值;它们的值将根据需要在运行时计算。这可能有点作弊,但它确实有优点:

  • 没有用于列 a 和 b 的存储空间
  • ID列被直接输入作为整数(4个字节未压缩); 而ROW_NUMBER返回bigint(8 字节未压缩)。
  • ID列分配标识属性,所以它是不可更新。

或者,将所有列存储在表中:

WITH Ten(N) AS 
(
    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL 
    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL 
    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
)   
SELECT
    id = CONVERT(integer, ROW_NUMBER() OVER (ORDER BY T10.N)),
    a = CONVERT(varchar(11), ROW_NUMBER() OVER (ORDER BY T10.N)),
    b = CONVERT(varchar(11), ROW_NUMBER() OVER (ORDER BY T10.N) / 2)
INTO dbo.T1
FROM Ten T10
CROSS JOIN Ten T100
CROSS JOIN Ten T1000
CROSS JOIN Ten T10000
CROSS JOIN Ten T100000
CROSS JOIN Ten T1000000;
Run Code Online (Sandbox Code Playgroud)

请注意id列上的整数转换,以及varchar类型上特定长度的使用。看:

要踢的坏习惯:Aaron Bertrand 在没有(长度)情况下声明 VARCHAR