一个处理百万级数据量的存储过程，请牛人来帮忙优化一下，谢谢。

hui717 2008-05-09 03:06:31





CREATE PROCEDURE [dbo].[CheckDataDistinct]

@tableName nvarchar(30),  --表名

@sumCount int,  --码的个数

@ilength_fw int, --防伪码的长度

@ilength_wl int, --物流码的长度

@scode nvarchar(10)  --码的关键字

AS

declare @strSql nvarchar(300)  --存储准备执行的SQL语句 

declare @tableCount int

declare @tmpCode_fw nvarchar(30)  --临时存放新生成的码

declare @tmpCode_wl nvarchar(30)  --临时存放新生成的码



    --从大到小处理物流码

    Create Table  #MyTmpeTable(id int,code_fw nvarchar(30),code_wl nvarchar(30),

                        flag_wl int, select_Sum_Count int)

    Set @strSql = '

    Insert  Into #MyTmpeTable

       Select a.*

          from '+ @tableName + ' a where

            id = (select min(id) from '+@tableName+' where code_wl = a.code_wl) order by a.id'

    exec(@strSql)--去掉物流码的所有重复值

    Select  @tableCount = Count(id) From #MyTmpeTable  --去掉重复值后还剩于的码个数

    

    while @tableCount < @sumCount  --补齐码的个数

    begin

       Set @tmpCode_fw = dbo.Code(@ilength_fw,@scode,@tableCount)  --生成新码

       Set @tmpCode_wl = dbo.Code(@ilength_wl,@scode,@tableCount)  --生成新码

       Insert into #MyTmpeTable(id,code_fw,code_wl,flag_wl,select_Sum_Count)

         Values(@tableCount,@tmpCode_fw,@tmpCode_wl,0,0)

       Set @tableCount = @tableCount+1

    end

    

    --从小到大处理防伪码

    Create Table  #MyTable_fw(id int,code_fw nvarchar(30),code_wl nvarchar(30),

                        flag_wl int, select_Sum_Count int)

    Set @strSql = '

    Insert  Into #MyTable_fw

       Select a.*

          from #MyTmpeTable a where

            id = (select min(id) from #MyTmpeTable where code_fw = a.code_fw) order by a.id'

    exec(@strSql)--去掉防伪码的所有重复值

    Select  @tableCount = Count(id) From #MyTable_fw  --去掉重复值后还剩于的码个数

    Set @tableCount = @sumCount - @tableCount

    while @tableCount   > 0   --补齐码的个数

    begin

       Set @tmpCode_fw = dbo.Code(@ilength_fw,@scode,@tableCount)  --生成新码

       Set @tmpCode_wl = dbo.Code(@ilength_wl,@scode,@tableCount)  --生成新码

       Insert into #MyTable_fw(id,code_fw,code_wl,flag_wl,select_Sum_Count)

         Values(@tableCount,@tmpCode_fw,@tmpCode_wl,0,0)

       Set @tableCount = @tableCount - 1

    end

    

    --把处理后的码，写回原表中

    

    Set @strSql = 'delete  '+@tableName

    exec(@strSql)

    

    Set @strSql = 'Insert into '+@tableName+' (code_fw,code_wl,flag_wl,select_Sum_Count '+

                      ') Select code_fw,code_wl,flag_wl,select_Sum_Count From #MyTable_fw '

    exec(@strSql)

GO



CREATE FUNCTION [dbo].[Code]  (@ilength int,@scode nvarchar(30),@inum int) 

RETURNS   nvarchar(50)

BEGIN 

--变量

declare @alength int

declare @strTmp nvarchar(50)  --新生成的码，返回用



set @alength = @ilength - 6  --需要补齐的位数

Set @strTmp = @inum  --码的关键字



if len(@strTmp) > @alength

begin  

   Set @strTmp = substring(@strTmp,1,@ilength)

end

else

begin

  declare @ailength int

  set @ailength = @alength - len(@strTmp)  --需要循环处理的次数

  declare @j int

  set @j = 0

  

  while @j < @ailength

  begin

    Set @strTmp = '0'+@strTmp

    Set @j = @j+1

  end

  Set @strTmp = @scode + @strTmp

end



RETURN(@strTmp)

END

就是把一个表的的重复记录去掉，然后重新生成新的不重复的码补上。

...全文

211 18 打赏收藏转发到动态举报

写回复

用AI写文章

18 条回复

切换为时间正序

请发表友善的回复…

发表回复

kylike 2008-05-12

打赏
举报



这个存储过程删除重复数据与循环在百万级数据量中都非常耗时，特别是

    Set @strSql = '

    Insert  Into #MyTable_fw

       Select a.*

          from #MyTmpeTable a where

            id = (select min(id) from #MyTmpeTable where code_fw = a.code_fw) order by a.id'



   这样相当于两个百万级数据量的大表做联接计算，百万*百万 是万亿的行数。数据量可想而知。强烈改用第2种方法，例如 Insert Into #MyTable_fw Select Distinct ... 的方法。

林g 2008-05-12

打赏
举报

[Quote=引用 1 楼 dobear_0922 的回复:]
好长，看看
[/Quote]

瓦力1981 2008-05-12

打赏
举报

学习学习。。

utpcb 2008-05-09

打赏
举报

建议用这个函数除掉重复的 ROW_NUMBER

2005可以用个

hui717 2008-05-09

打赏
举报

多谢各位，我实验一下

hery2002 2008-05-09

打赏
举报

[Quote=引用 11 楼 Limpire 的回复:]
处理了这两个循环插入，效率应该提升不少
[/Quote]
使用临时表替代循环吧,

Limpire 2008-05-09

打赏
举报

另外，可以将逻辑全部封装在一个 EXEC，用 SELECT INTO 会比 INSERT INTO 快点。

Limpire 2008-05-09

打赏
举报

处理了这两个循环插入，效率应该提升不少

Limpire 2008-05-09

打赏
举报

--从大到小处理物流码

--> Limpire: 添加个标识列tableCount，用于批量补齐码的个数

Create Table #MyTmpeTable(tableCount int identity(0,1), id int,code_fw nvarchar(30),code_wl nvarchar(30), flag_wl int,select_Sum_Count int)

Set @strSql = '

	Insert Into #MyTmpeTable

		Select a.*

			from '+ @tableName + ' a where

				id = (select min(id) from '+@tableName+' where code_wl = a.code_wl) order by a.id'

exec(@strSql)--去掉物流码的所有重复值

set @tableCount=@@identity

--Select @tableCount = Count(id) From #MyTmpeTable --去掉重复值后还剩于的码个数



insert into #MyTmpeTable(id,code_fw,code_wl,flag_wl,select_Sum_Count)

	select tableCount+@tableCount, dbo.Code(@ilength_fw,@scode,tableCount+@tableCount), dbo.Code(@ilength_wl,@scode,tableCount+@tableCount),0,0

		from #MyTmpeTable

			where tableCount+@tableCount<@sumCount

/*

while @tableCount < @sumCount --补齐码的个数

begin

Set @tmpCode_fw = dbo.Code(@ilength_fw,@scode,@tableCount) --生成新码

Set @tmpCode_wl = dbo.Code(@ilength_wl,@scode,@tableCount) --生成新码

Insert into #MyTmpeTable(id,code_fw,code_wl,flag_wl,select_Sum_Count)

Values(@tableCount,@tmpCode_fw,@tmpCode_wl,0,0)

Set @tableCount = @tableCount+1

end

*/



--> Limpire: 下面的操作按类似方式处理

vivadata1 2008-05-09

打赏
举报

while @tableCount < @sumCount --补齐码的个数
begin
Set @tmpCode_fw = dbo.Code(@ilength_fw,@scode,@tableCount) --生成新码
Set @tmpCode_wl = dbo.Code(@ilength_wl,@scode,@tableCount) --生成新码
Insert into #MyTmpeTable(id,code_fw,code_wl,flag_wl,select_Sum_Count)
Values(@tableCount,@tmpCode_fw,@tmpCode_wl,0,0)
Set @tableCount = @tableCount+1
end

能不能做做这样?
CREATE TABLE #Temp1111
(
iDataID INTEGER,
sDataID VARCHAR(100)
)

while @tableCount < @sumCount --补齐码的个数
begin
insert into #Temp1111 VALUES(@tableCount,CONVERT(VARCHAR(100),@tableCount))
Set @tableCount = @tableCount+1
end

inset into #MyTmpeTable
-- SELECT iDataID,dbo.Code(@ilength_fw,@scode,iDataID),dbo.Code(@ilength_wl,@scode,iDataID),0,0
SELECT iDataID,
CASE WHEN len(sDataID)>@ilength_fw-6 THEN substring(sDataID,1,@ilength_fw-6) ELSE RIGHT('000000000000000000000000000000000000000000'+sDataID,@ilength_fw-6) END,
**************,0,0
FROM #Temp1111

hui717 2008-05-09