SQL INSERT但避免重复

Rav*_*avi 11 sql sql-server sql-server-2005

我想做一些快速插入,但避免重复到表中.为了论证,我们称之为MarketPrices,我一直在尝试两种方法,但不确定如何更快地进行基准测试.

INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen)
SELECT @SecurityCode, @BuyPrice,  @SellPrice, @IsMarketOpen
EXCEPT
SELECT SecurityCode, BuyPrice, SellPrice, j.bool as IsActive FROM MarketPrices
CROSS JOIN (SELECT 0 as bool UNION SELECT 1 as bool ) as j
Run Code Online (Sandbox Code Playgroud)

要么

DECLARE @MktId int
SET @MktId = (SELECT SecurityId FROM MarketPrices 
              where SecurityCode = @SecurityCode 
              and BuyPrice=@BuyPrice 
              and SellPrice = @SellPrice)

IF (@MktId is NULL)  
BEGIN
    INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen)
    VALUES
    (@SecurityCode,@BuyPrice, @SellPrice, @IsMarketOpen)
END
Run Code Online (Sandbox Code Playgroud)

假设这@whatever是存储过程中的输入参数.

我希望能够在BuyPrice或SellPrice或两者与之前的其他所有事件不同时为每个SecurityCode插入新记录.我不关心IsMarketOpen.

关于上述任何一种方法,有什么明显的愚蠢之处吗?一个比另一个快吗?

Pet*_*hia 11

编辑:为防止并发环境中的竞争条件,请WITH (UPDLOCK)在相关子查询或EXCEPT'd中使用SELECT.我在下面编写的测试脚本不需要它,因为它使用仅对当前连接可见的临时表,但在真实环境中,对用户表进行操作时,它是必要的.

MERGE不需要UPDLOCK.


受mcl的回答启发:独特索引并让数据库抛出错误,我决定对条件插入try/catch进行基准测试.

结果似乎支持try/catch上的条件插入,但支持YMMV.这是一个非常简单的场景(一列,小桌子等),在一台机器上执行,等等.

以下是结果(SQL Server 2008,版本10.0.1600.2):

duplicates (short table)    
  try/catch:                14440 milliseconds / 100000 inserts
  conditional insert:        2983 milliseconds / 100000 inserts
  except:                    2966 milliseconds / 100000 inserts
  merge:                     2983 milliseconds / 100000 inserts

uniques
  try/catch:                 3920 milliseconds / 100000 inserts
  conditional insert:        3860 milliseconds / 100000 inserts
  except:                    3873 milliseconds / 100000 inserts
  merge:                     3890 milliseconds / 100000 inserts

  straight insert:           3173 milliseconds / 100000 inserts

duplicates (tall table)
  try/catch:                14436 milliseconds / 100000 inserts
  conditional insert:        3063 milliseconds / 100000 inserts
  except:                    3063 milliseconds / 100000 inserts
  merge:                     3030 milliseconds / 100000 inserts
Run Code Online (Sandbox Code Playgroud)

请注意,即使在唯一插入上,尝试/捕获的开销也比条件插入稍微多一些.我想知道这是否因版本,CPU,内核数量等而异.

我没有对IF条件插入进行基准测试WHERE.我假设IF变量会显示更多开销,因为a)你有两个语句,并且b)你需要将两个语句包装在一个事务中并将隔离级别设置为serializable(!).如果有人想要对此进行测试,则需要将临时表更改为常规用户表(可序列化不适用于本地临时表).

这是脚本:

-- tested on SQL 2008.
-- to run on SQL 2005, comment out the statements using MERGE
set nocount on

if object_id('tempdb..#temp') is not null drop table #temp
create table #temp (col1 int primary key)
go

-------------------------------------------------------

-- duplicate insert test against a table w/ 1 record

-------------------------------------------------------

insert #temp values (1)
go

declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
  set @y = @y+1
  begin try 
    insert #temp select @x
  end try
  begin catch end catch
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (short table), try/catch: %i milliseconds / %i inserts',-1,-1,@duration,@y) with nowait
go

declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
  set @y = @y+1
  insert #temp select @x where not exists (select * from #temp where col1 = @x)
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (short table), conditional insert: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
go

declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
  set @y = @y+1
  insert #temp select @x except select col1 from #temp
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (short table), except: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
go

-- comment this batch out for SQL 2005
declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
  set @y = @y+1
  merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1);
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (short table), merge: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
go

-------------------------------------------------------

-- unique insert test against an initially empty table

-------------------------------------------------------

truncate table #temp
declare @x int, @now datetime, @duration int
select @x = 0, @now = getdate()
while @x < 100000 begin
  set @x = @x+1
  insert #temp select @x
end
set @duration = datediff(ms,@now,getdate())
raiserror('uniques, straight insert: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
go

truncate table #temp
declare @x int, @now datetime, @duration int
select @x = 0, @now = getdate()
while @x < 100000 begin
  set @x = @x+1
  begin try 
    insert #temp select @x
  end try
  begin catch end catch
end
set @duration = datediff(ms,@now,getdate())
raiserror('uniques, try/catch: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
go

truncate table #temp
declare @x int, @now datetime, @duration int
select @x = 0, @now = getdate()
while @x < 100000 begin
  set @x = @x+1
  insert #temp select @x where not exists (select * from #temp where col1 = @x)
end
set @duration = datediff(ms,@now,getdate())
raiserror('uniques, conditional insert: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
go

truncate table #temp
declare @x int, @now datetime, @duration int
select @x = 0, @now = getdate()
while @x < 100000 begin
  set @x = @x+1
  insert #temp select @x except select col1 from #temp
end
set @duration = datediff(ms,@now,getdate())
raiserror('uniques, except: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
go

-- comment this batch out for SQL 2005
truncate table #temp
declare @x int, @now datetime, @duration int
select @x = 1, @now = getdate()
while @x < 100000 begin
  set @x = @x+1
  merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1);
end
set @duration = datediff(ms,@now,getdate())
raiserror('uniques, merge: %i milliseconds / %i inserts',-1,-1,@duration, @x) with nowait
go

-------------------------------------------------------

-- duplicate insert test against a table w/ 100000 records

-------------------------------------------------------

declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
  set @y = @y+1
  begin try 
    insert #temp select @x
  end try
  begin catch end catch
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (tall table), try/catch: %i milliseconds / %i inserts',-1,-1,@duration,@y) with nowait
go

declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
  set @y = @y+1
  insert #temp select @x where not exists (select * from #temp where col1 = @x)
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (tall table), conditional insert: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
go

declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
  set @y = @y+1
  insert #temp select @x except select col1 from #temp
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (tall table), except: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
go

-- comment this batch out for SQL 2005
declare @x int, @y int, @now datetime, @duration int
select @x = 1, @y = 0, @now = getdate()
while @y < 100000 begin
  set @y = @y+1
  merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1);
end
set @duration = datediff(ms,@now,getdate())
raiserror('duplicates (tall table), merge: %i milliseconds / %i inserts',-1,-1,@duration, @y) with nowait
go
Run Code Online (Sandbox Code Playgroud)


Pet*_*hia 6

编辑:为防止并发环境中的竞争条件,请WITH (UPDLOCK)在相关子查询中使用.


我认为这将是标准方法:

INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen)
SELECT @SecurityCode, @BuyPrice,  @SellPrice, @IsMarketOpen
WHERE NOT EXISTS (
  SELECT * FROM MarketPrices WITH (UPDLOCK)
  WHERE SecurityCode = @SecurityCode 
    AND BuyPrice = @BuyPrice 
    AND SellPrice = @SellPrice
  )
Run Code Online (Sandbox Code Playgroud)

如果您的任何字段可以为空,则必须将其添加到条件中.

你的第一种方法很有趣,但是对EXCEPT的要求让你跳过了箍.这种方法基本相同,但它可以解决列匹配问题.

或者:

INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen)
SELECT SecurityCode, BuyPrice, SellPrice, @IsMarketOpen
FROM (
  SELECT @SecurityCode, @BuyPrice,  @SellPrice
  EXCEPT
  SELECT SecurityCode, BuyPrice, SellPrice FROM MarketPrices WITH (UPDLOCK)
  ) a (SecurityCode, BuyPrice, SellPrice)
Run Code Online (Sandbox Code Playgroud)

在这个实例中,EXCEPT的好处是它可以处理NULL而无需您进行任何额外的编码.要在第一个示例中实现相同的功能,您需要测试每个对的NULL以及相等的长手.

你的第二种方法没问题,但你不需要变量.看到Tomalak的解决方案,他很好地清理了它.此外,如果这是一个问题,您需要显式处理并发插入的可能性.