【问题标题】:SQL - Find duplicate values and remove in a fieldSQL - 查找重复值并在字段中删除
【发布时间】:2013-09-10 04:34:12
【问题描述】:

考虑 Article 表中名为 ArticleCategories 的列。目标是从每行的 ArticleCategories 列中删除重复的数字(。

ArticleCategories
------------
3193;2867;3193;
2871;2923;2923;
3278;3337;3337;
2878;2876;2878;
3720;3680;3680;

关于如何使用 SQL Server 完成此任务的任何帮助?

想要的结果

ArticleCategories
------------
3193;2867;
2871;2923;
3278;3337;
2878;2876;
3720;3680;

如果有人可以提供帮助,请提前致谢。

【问题讨论】:

标签: sql sql-server


【解决方案1】:

首先创建函数SplitAndRemoveDuplicates,它将用分隔符分割给定字符串,删除重复项,并返回字符串:

CREATE FUNCTION [dbo].[SplitAndRemoveDuplicates] (@sep VARCHAR(32), @s VARCHAR(MAX))

RETURNS VARCHAR(MAX)  
AS
BEGIN

   DECLARE @t TABLE (val VARCHAR(MAX)) 

   DECLARE @xml XML
   SET @xml = N'<root><r>' + REPLACE(@s, @sep, '</r><r>') + '</r></root>'

   INSERT INTO @t(val) SELECT r.value('.','VARCHAR(MAX)') as Item FROM @xml.nodes('//root/r') AS RECORDS(r)

   ;WITH cte
    AS (SELECT ROW_NUMBER() OVER (PARTITION BY val ORDER BY val desc) RN
    FROM  @t)
    DELETE FROM cte
    WHERE  RN > 1

    RETURN (SELECT val + ';' from @t WHERE val <> '' FOR XML PATH ('') )
END

利用函数更新已有数据:

UPDATE Article
SET ArticleCategories = (SELECT dbo.[SplitAndRemoveDuplicates](';', ArticleCategories))

现在你有了一个可重用的函数。

感谢:https://stackoverflow.com/a/314917/455770https://stackoverflow.com/a/3822833/455770

【讨论】:

    【解决方案2】:

    找到了解决我问题的方法,希望对其他人也有帮助。

    CREATE FUNCTION dbo.DistinctList
    (
    @List VARCHAR(MAX),
    @Delim CHAR
    )
    RETURNS
    VARCHAR(MAX)
    AS
    BEGIN
    DECLARE @ParsedList TABLE
    (
    Item VARCHAR(MAX)
    )
    DECLARE @list1 VARCHAR(MAX), @Pos INT, @rList VARCHAR(MAX)
    SET @list = LTRIM(RTRIM(@list)) + @Delim
    SET @pos = CHARINDEX(@delim, @list, 1)
    WHILE @pos > 0
    BEGIN
    SET @list1 = LTRIM(RTRIM(LEFT(@list, @pos - 1)))
    IF @list1 <> ''
    INSERT INTO @ParsedList VALUES (CAST(@list1 AS VARCHAR(MAX)))
    SET @list = SUBSTRING(@list, @pos+1, LEN(@list))
    SET @pos = CHARINDEX(@delim, @list, 1)
    END
    SELECT @rlist = COALESCE(@rlist+',','') + item
    FROM (SELECT DISTINCT Item FROM @ParsedList) t
    RETURN @rlist
    END
    GO
    SELECT dbo.DistinctList('342,34,456,34,3454,456,aa,bb,cc,aa',',') DistinctList
    GO
    

    在此感谢脚本的作者。 http://blog.sqlauthority.com/2009/01/15/sql-server-remove-duplicate-entry-from-comma-delimited-string-udf/

    【讨论】:

      【解决方案3】:

      没有光标也没有while,是本站之前的问题的组合

      1. Split Columns
      2. Join The Rows Back

      所以答案是创建一个string Split function

      CREATE FUNCTION [dbo].[StringSplit]
      (
        @delimited nvarchar(max),
        @delimiter nvarchar(100)     
      ) RETURNS @t TABLE
      (
      -- Id column can be commented out, not required for sql splitting string
        id int identity(1,1), -- I use this column for numbering splitted parts
        val nvarchar(max)
      )
      AS
      BEGIN
        declare @xml xml
        set @xml = N'<root><r>' + replace(@delimited,@delimiter,'</r><r>') + '</r></root>'
      
        insert into @t(val)
        select 
          r.value('.','varchar(max)') as item
        from @xml.nodes('//root/r') as records(r)
      
        RETURN
      END
      

      查询看起来像:

      declare @Article  table
      (
        ArticleCategories varchar(100)
      )
      
      insert into @Article values
      ('3193;2867;3193;'),
      ('2871;2923;2923;'),
      ('3278;3337;3337;'),
      ('2878;2876;2878;'),
      ('3720;3680;3680;')
      
      ;WITH DistinctArticles AS (
          SELECT DISTINCT ArticleCategories, Val
          FROM @article
          CROSS APPLY dbo.[StringSplit](ArticleCategories, ';')
          WHERE Val <> ''
      ),
      Concatenated AS (
          SELECT 
              ArticleCategories,
              STUFF((
                  SELECT '; ' + CAST([Val] AS VARCHAR(MAX)) 
                  FROM DistinctArticles
                  WHERE (ArticleCategories = Results.ArticleCategories) 
          FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)'),1,2,'') + ';' AS DistinctArticleCategories
          FROM DistinctArticles Results
          GROUP BY ArticleCategories
      )
      UPDATE @Article 
      SET ArticleCategories = Concatenated.DistinctArticleCategories
      FROM @Article a
      INNER JOIN Concatenated ON a.ArticleCategories = Concatenated.ArticleCategories
      

      【讨论】:

      • 你采取了和我一样的方法,为此投了赞成票。我的更简单......让我知道你的想法:stackoverflow.com/a/18711930/455770
      • @Rots 我也支持你。我认为你会在大桌子上做得更好。
      【解决方案4】:

      尝试以下解决方案

      首先创建一个UDF,如下所示

      alter function dbo.SplitString2(@inputStr varchar(1000),@del varchar(5))
      RETURNS varchar(max)
      As
      BEGIN
      
      DECLARE @t table(col1 varchar(100))
      DECLARE @table table(col1 varchar(100))
      DECLARE @ret varchar(max)
      Set @ret = ''
      INSERT INTO @t
      select @inputStr
      
      if CHARINDEX(@del,@inputStr,1) > 0
      BEGIN
          ;WITH CTE1 as (
          select ltrim(rtrim(LEFT(col1,CHARINDEX(@del,col1,1)-1))) as col,RIGHT(col1,LEN(col1)-CHARINDEX(@del,col1,1)) as rem from @t
          union all
          select ltrim(rtrim(LEFT(rem,CHARINDEX(@del,rem,1)-1))) as col,RIGHT(rem,LEN(rem)-CHARINDEX(@del,rem,1))
          from CTE1 c
          where CHARINDEX(@del,rem,1)>0
          )
      
              INSERT INTO @table 
              select col from CTE1
              union all
              select rem from CTE1 where CHARINDEX(@del,rem,1)=0
          END
      ELSE
      BEGIN
          INSERT INTO @table 
          select col1 from @t
      END
      
      Set @ret = (Select distinct col1 + ';'  from @table for xml path(''))
      
      return @ret
      
      END
      

      然后您可以根据需要运行以下选择/更新查询

      更新查询——这将更新您表中的记录

      update ArticleCategory Set ArticleCategories = dbo.SplitString2(ArticleCategories, ';' )
      

      选择查询——这将只选择不同的记录

      Select ArticleCategories , dbo.SplitString2(ArticleCategories, ';' ) from ArticleCategory
      

      【讨论】:

        【解决方案5】:

        如果必须保留哪些行无关紧要:

        DELETE FROM Article WHERE (
            SELECT COUNT(*) FROM Article t WHERE
            t.ArticleCategories=Article.ArticleCategories AND t.%%physloc%%<Article.%%physloc%%
        )>0;
        

        这就是我在 SQLite 中的做法,不确定我是否匹配 MS SQL Server 的语法...

        【讨论】:

          【解决方案6】:

          请尝试以下解决方案,它已针对您的情况进行了全面测试,希望对您有所帮助。

          DECLARE @ArticleCategories varchar(500)
          DECLARE Cursor_Article CURSOR FOR 
          SELECT ArticleCategories from Article
          OPEN Cursor_Article
          FETCH NEXT FROM Cursor_Article INTO @ArticleCategories
          WHILE @@FETCH_STATUS = 0
          BEGIN
          
             DECLARE @individualNum varchar(500),@ArticleCategoriesNew varchar(500) ,@ArticleCategoriesRem varchar(500)
             SET @ArticleCategoriesNew='';
             SET @ArticleCategoriesRem=@ArticleCategories
          
             WHILE PATINDEX('%;%',@ArticleCategoriesRem) > 0
             BEGIN
                 SET @individualNum = SUBSTRING(@ArticleCategoriesRem, 0, PATINDEX('%;%',@ArticleCategoriesRem)) 
          
                 SET @ArticleCategoriesRem = SUBSTRING(@ArticleCategoriesRem, LEN(@individualNum + ';') + 1, LEN(@ArticleCategoriesRem))
                 if(@ArticleCategoriesRem not like '%'+@individualNum+'%')
                 set @ArticleCategoriesNew=CASE WHEN ISNULL(@ArticleCategoriesNew,'')='' THEN @individualNum ELSE @ArticleCategoriesNew+';'+@individualNum+';' END
          
             END
             --If ArticleCategories is unique then leave same condition else Fetch primary key in cursor and append in Where condition of Update query
             Update Article SET ArticleCategories=@ArticleCategoriesNew WHERE ArticleCategories=@ArticleCategories    
          
             FETCH NEXT FROM Cursor_Article INTO @ArticleCategories
          END 
          CLOSE Cursor_Article;
          DEALLOCATE Cursor_Article;
          

          【讨论】:

            【解决方案7】:
            SELECT DISTINCT ArticleCategories FROM Article
            

            SELECT ArticleCategories 
            FROM Article
            GROUP BY ArticleCategories 
            

            这是删除重复值

            DELETE
            FROM Article
            WHERE ArticleCategories NOT IN
            (
               SELECT MAX(ArticleCategories)
               FROM Article
               GROUP BY ArticleCategories
            )
            

            【讨论】:

            • 目标是从每行的 ArticleCategories 列中删除重复的数字。此答案不会删除它们。
            • 我认为您没有正确解释这个问题。根据我的阅读,OP 具有单独的值,例如 3193;2867;3193; - 其中分号实际上是字段值的一部分 - 并且(s)他希望编写一个 UPDATE 语句来修改这些值以删除重复项 - - 产生诸如3193;2867; 之类的值。
            • 此外,即使按照您的明显解释,您的 DELETE 语句也没有任何意义:它说要从 Article 中删除其 ArticleCategories 的值未出现的任何行在任何Article 行'ArticleCategories。那永远不会删除任何东西。
            • 现在您正在回答问题,但恐怕您的回答不正确。
            • @user2763202 不要太担心。这是一个问答网站,所以答案需要准确 - 不要个人认为。
            猜你喜欢
            • 1970-01-01
            • 1970-01-01
            • 1970-01-01
            • 2019-02-17
            • 1970-01-01
            • 2015-07-03
            • 1970-01-01
            • 2012-01-16
            • 2020-08-24
            相关资源
            最近更新 更多