尝试在所有列以及 ID 不同的地方将表连接到自身...
CREATE TABLE Dups
(
ID int IDENTITY(1,1) PRIMARY KEY,
Col1 int NOT NULL,
Col2 date NOT NULL,
Col3 char(1) NOT NULL,
Col4 char(1) NOT NULL
)
INSERT dbo.Dups (Col1,Col2,Col3,Col4)
VALUES ('1','20170925','A','Z'), ('1','20170925','A','Z'), ('1','20170925','A','Z'), ('2','20170925','A','Z'), ('2','20170925','A','Z'), ('2','20170925','A','Z'), ('3','20170925','A','Z');
SELECT * FROM Dups;
-- This solution to retain the first ID found that is duplicated...
DELETE FROM Dups
WHERE ID IN (
SELECT ID
FROM (
SELECT d1.ID,
row_number() OVER (ORDER BY d1.ID) AS DupSeq
FROM dbo.Dups AS d1
INNER JOIN dbo.Dups AS d2 ON d2.Col1 = d1.Col1 AND d2.Col2 = d1.Col2 AND d2.Col3 = d1.Col3 AND d2.Col4 = d1.Col4
WHERE d1.ID <> d2.ID
) AS t
WHERE DupSeq > 1
);
-- This solution to retain the last ID found that is duplicated...
DELETE FROM Dups
WHERE ID NOT IN (
SELECT DISTINCT
max(t.ID) OVER(PARTITION BY t.Col1,t.Col2,t.Col3,t.Col4 ORDER BY WindowOrder) AS KeepID
FROM (
SELECT d1.ID,
d1.Col1,
d1.Col2,
d1.Col3,
d1.Col4,
1 AS WindowOrder
FROM dbo.Dups AS d1
LEFT OUTER JOIN dbo.Dups AS d2 ON d2.Col1 = d1.Col1
AND d2.Col2 = d1.Col2
AND d2.Col3 = d1.Col3
AND d2.Col4 = d1.Col4
AND d1.ID <> d2.ID
) AS t
);
SELECT * FROM Dups;
DROP TABLE dbo.Dups
您需要在第一个解决方案中使用 row_number(),因为显然 ID1 将匹配 ID3,因此 ID3 也将匹配 ID1。
在第二种解决方案中,连接是 LEFT OUTER 以保留那些不重复的值。