【发布时间】:2014-12-13 05:15:38
【问题描述】:
我正在尝试微调使用 ROW_NUMBER 和许多 PARTITION BY 参数的查询,以从数据库中获取唯一记录。查询如下:
SELECT SDP.SuspectID
,SDP.RecordID
,SDP.Field
,SDP.FieldEntryDate
,SDP.ScreenDate
,SDP.SuspectDetails
,CONVERT(VARCHAR(10),SDP.PeriodEndDate,120) AS PeriodEndDate
,ROW_NUMBER() OVER (PARTITION BY SDP.recordID, SDP.Field, SDP.ScreenerID, SDP.PeriodEndDate, SDP.PeriodID, SDP.[Source], SDP.SuspectDetails ORDER BY SDP.UploadDate) AS RowNum
,SDP.DatabaseAccountCode
,SDP.RecordUpdateType
INTO #Temp
FROM dbo.SuspectDataPoint
AS SDP
LEFT JOIN dbo.Screener
AS S
ON S.screenerID = SDP.screenerID
WHERE SDP.ScreenerId not in (719)
AND S.DatabaseName = 'db'
AND CONVERT(DATE, SDP.FieldEntryDate) > DATEADD(dd,-24,GETDATE())
我尝试手动查找此查询中效率最低的部分是什么,结果发现使用 ROW_NUMBER 时它的运行时间比没有它时长 10 倍以上。
我还尝试检查执行计划以确认该发现,但有一些我不明白的地方。如果我按照上面给出的方式运行查询,执行计划显示最大的成本是插入临时表 (45%)。当我在没有 ROW_NUMBER 行的情况下运行相同的查询时也是这种情况,只是 % 略有不同(61%)。我知道在第一个查询中,ROW_NUMBER 操作发生在SORT,但与其他查询相比,它相当便宜(25%)。然而,正如我所说,没有和使用ROW_NUMBER 行所花费的实际时间非常不同(平均为 1 秒与 13 秒)。有人可以向我解释吗?我是不是看错了计划?
编辑:添加索引视图
编辑:添加架构和索引脚本
/****** Object: Table [dbo].[Screener] Script Date: 10/17/2014 5:53:51 PM ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
SET ANSI_PADDING ON
GO
CREATE TABLE [dbo].[Screener](
[ScreenerID] [int] NOT NULL,
[ScreenerName] [varchar](255) NOT NULL,
[ScreenerDescription] [nvarchar](4000) NULL,
[Script] [text] NULL,
[HitRate] [numeric](10, 2) NOT NULL DEFAULT ((1)),
[CreatedOn] [date] NULL,
[CreatedBy] [varchar](7) NULL,
[SuspectReason] [nvarchar](4000) NULL,
[IsExtremeOutlier] [bit] NOT NULL DEFAULT ((0)),
[DatabaseName] [varchar](20) NOT NULL,
PRIMARY KEY CLUSTERED
(
[ScreenerID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
GO
SET ANSI_PADDING OFF
GO
/****** Object: Table [dbo].[SuspectDataPoint] Script Date: 10/17/2014 5:53:52 PM ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
SET ANSI_PADDING ON
GO
CREATE TABLE [dbo].[SuspectDataPoint](
[SuspectID] [int] IDENTITY(1,1) NOT FOR REPLICATION NOT NULL,
[RecordID] [varchar](30) NOT NULL,
[Field] [varchar](50) NOT NULL,
[ScreenerID] [int] NOT NULL,
[ScreenDate] [datetime] NOT NULL,
[SuspectDetails] [nvarchar](4000) NULL,
[PeriodEndDate] [datetime] NULL,
[FieldEntryDate] [datetime] NOT NULL,
[OriginalValue] [nvarchar](4000) NULL,
[Source] [nvarchar](300) NULL,
[UniqueSystemID] [int] NOT NULL,
[DatabaseAccountCode] [varchar](50) NULL,
[RecordUpdateType] [varchar](60) NULL,
[UploadDate] [datetime] NOT NULL DEFAULT (getdate()),
[PeriodID] [varchar](20) NULL,
PRIMARY KEY CLUSTERED
(
[SuspectID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
SET ANSI_PADDING OFF
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [in_n_Screener_DatabaseName] Script Date: 10/17/2014 5:53:52 PM ******/
CREATE NONCLUSTERED INDEX [in_n_Screener_DatabaseName] ON [dbo].[Screener]
(
[DatabaseName] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [in_n_SuspectDataPoint_DatabaseAccountCode] Script Date: 10/17/2014 5:53:52 PM ******/
CREATE NONCLUSTERED INDEX [in_n_SuspectDataPoint_DatabaseAccountCode] ON [dbo].[SuspectDataPoint]
(
[DatabaseAccountCode] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [in_n_SuspectDataPoint_Field] Script Date: 10/17/2014 5:53:52 PM ******/
CREATE NONCLUSTERED INDEX [in_n_SuspectDataPoint_Field] ON [dbo].[SuspectDataPoint]
(
[Field] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [in_n_SuspectDataPoint_RecordID] Script Date: 10/17/2014 5:53:52 PM ******/
CREATE NONCLUSTERED INDEX [in_n_SuspectDataPoint_RecordID] ON [dbo].[SuspectDataPoint]
(
[RecordID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [in_n_SuspectDataPoint_ScreenerID] Script Date: 10/17/2014 5:53:52 PM ******/
CREATE NONCLUSTERED INDEX [in_n_SuspectDataPoint_ScreenerID] ON [dbo].[SuspectDataPoint]
(
[ScreenerID] ASC
)
INCLUDE ( [ScreenDate]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [in_n_SuspectDataPoint_UniqueSystemID] Script Date: 10/17/2014 5:53:52 PM ******/
CREATE NONCLUSTERED INDEX [in_n_SuspectDataPoint_UniqueSystemID] ON [dbo].[SuspectDataPoint]
(
[UniqueSystemID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [SuspectDataPoint_FieldEntryDate_Index] Script Date: 10/17/2014 5:53:52 PM ******/
CREATE NONCLUSTERED INDEX [SuspectDataPoint_FieldEntryDate_Index] ON [dbo].[SuspectDataPoint]
(
[FieldEntryDate] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [SuspectDataPoint_RecordUpdateType_Index] Script Date: 10/17/2014 5:53:52 PM ******/
CREATE NONCLUSTERED INDEX [SuspectDataPoint_RecordUpdateType_Index] ON [dbo].[SuspectDataPoint]
(
[RecordUpdateType] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [SuspectDataPoint_ScreenDate_Index] Script Date: 10/17/2014 5:53:52 PM ******/
CREATE NONCLUSTERED INDEX [SuspectDataPoint_ScreenDate_Index] ON [dbo].[SuspectDataPoint]
(
[ScreenDate] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [SuspectDataPoint_Source_Index] Script Date: 10/17/2014 5:53:52 PM ******/
CREATE NONCLUSTERED INDEX [SuspectDataPoint_Source_Index] ON [dbo].[SuspectDataPoint]
(
[Source] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [SuspectDataPoint_SuspectID_Index] Script Date: 10/17/2014 5:53:52 PM ******/
CREATE UNIQUE NONCLUSTERED INDEX [SuspectDataPoint_SuspectID_Index] ON [dbo].[SuspectDataPoint]
(
[SuspectID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [SuspectDataPoint_UploadDate_Index] Script Date: 10/17/2014 5:53:52 PM ******/
CREATE NONCLUSTERED INDEX [SuspectDataPoint_UploadDate_Index] ON [dbo].[SuspectDataPoint]
(
[UploadDate] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
ALTER TABLE [dbo].[Screener] WITH CHECK ADD CONSTRAINT [FK_Screener_DatabaseInfo] FOREIGN KEY([DatabaseName])
REFERENCES [dbo].[DatabaseInfo] ([DatabaseName])
ON UPDATE CASCADE
GO
ALTER TABLE [dbo].[Screener] CHECK CONSTRAINT [FK_Screener_DatabaseInfo]
GO
【问题讨论】:
-
您能否将您的架构和索引设置添加到问题中?
-
糟糕,抱歉,编辑不正确。我在查询中没有看到这两个数据库的任何架构,我会尽快发布它们的索引
-
虽然图像有助于快速概览,但最好为表和索引添加 SQL 脚本。通过右键单击 db 并选择
tasks/generate scripts并在以下选项卡的高级对话框中选择表并添加索引,可以轻松生成代码。您可以输出到剪贴板并粘贴。 -
不确定您是否收到有关我的编辑的通知;添加了请求的信息。
-
我确实收到了通知,添加的信息有所帮助。一会儿再看
标签: sql-server tsql sql-execution-plan