我有 3 个可能的答案。
你的问题的 sql fiddle 在这里:http://sqlfiddle.com/#!3/7c7a0/3/0
我的答案的 sql 小提琴在这里:http://sqlfiddle.com/#!3/5d257/1
警告:
-
查询分析器还不够 - 我注意到许多答案被拒绝,因为他们的查询计划比原始查询更昂贵。分析仪只是指南。根据实际数据集、硬件和用例,更昂贵的查询可以比更便宜的查询更快地返回结果。您必须在您的环境中进行测试。
-
查询分析器无效 - 即使您找到了从查询中删除“最昂贵的步骤”的方法,它通常对您的查询没有任何影响。
-
单独的查询更改很少能缓解架构/设计问题 - 一些答案被拒绝,因为它们涉及架构级别的更改,例如触发器和附加表。拒绝优化的复杂查询强烈表明问题出在底层设计或我的期望上。您可能不喜欢它,但您可能不得不接受该问题在查询级别无法解决。
-
索引视图不能包含 row_number()/partitition 子句 - 通过创建对象表的六个副本来解决自联接问题不足以让您创建建议的索引视图。我在this sqlfiddle 中尝试过。如果您取消注释最后一个“创建索引”语句,您将收到错误消息,因为您的视图“包含排名或聚合窗口函数”。
工作答案:
-
左连接而不是 row_number() - 您可以使用使用左连接的查询来排除在树中被覆盖的结果。从这个查询中删除最后的“order by”实际上删除了一直困扰你的排序!此查询的执行计划仍然比您原来的更昂贵,但请参阅上面的免责声明 #1。
-
部分查询的索引视图 - 使用一些严肃的查询魔法(基于this technique),我为部分查询创建了一个索引视图。此视图可用于增强原始问题查询或答案#1。
-
实现为索引良好的表 - 其他人提出了这个答案,但他们可能没有很好地解释它。除非您的结果集非常大或者您对源表进行非常频繁的更新,否则实现查询结果并使用触发器使它们保持最新是解决此类问题的完美方法。为查询创建视图后,测试此选项就很容易了。您可以重复使用答案 #2 来加快触发速度,然后随着时间的推移进一步改进它。 (您正在谈论创建表的 六个 个副本,请先尝试此操作。它可以保证您关心的选择的性能将尽可能好。)
这是我从 sqlfiddle 获得的答案的架构部分:
Create Table Objects
(
Id int not null identity primary key,
LeftIndex int not null default 0,
RightIndex int not null default 0
)
alter table Objects add ParentId int null references Objects
CREATE TABLE TP
(
Object1 int not null references Objects,
Object2 int not null references Objects,
Object3 int not null references Objects,
Property varchar(20) not null,
Value varchar(50) not null
)
insert into Objects(LeftIndex, RightIndex) values(1, 10)
insert into Objects(ParentId, LeftIndex, RightIndex) values(1, 2, 5)
insert into Objects(ParentId, LeftIndex, RightIndex) values(1, 6, 9)
insert into Objects(ParentId, LeftIndex, RightIndex) values(2, 3, 4)
insert into Objects(ParentId, LeftIndex, RightIndex) values(3, 7, 8)
insert into TP(Object1, Object2, Object3, Property, Value) values(1,2,3, 'P1', 'abc')
insert into TP(Object1, Object2, Object3, Property, Value) values(1,2,3, 'P2', 'xyz')
insert into TP(Object1, Object2, Object3, Property, Value) values(1,3,4, 'P1', '123')
insert into TP(Object1, Object2, Object3, Property, Value) values(2,4,5, 'P1', '098')
create index ix_LeftIndex on Objects(LeftIndex)
create index ix_RightIndex on Objects(RightIndex)
create index ix_Objects on TP(Property, Value, Object1, Object2, Object3)
create index ix_Prop on TP(Property)
GO
---------- QUESTION ADDITIONAL SCHEMA --------
CREATE VIEW TPResultView AS
Select O1, O2, O3, Property, Value
FROM
(
select Children1.Id as O1, Children2.Id as O2, Children3.Id as O3, tp.Property, tp.Value,
row_number() over(
partition by Children1.Id, Children2.Id, Children3.Id, tp.Property
order by Objects1.LeftIndex desc, Objects2.LeftIndex desc, Objects3.LeftIndex desc
)
as Idx
from tp
-- Select corresponding objects of the triple
inner join Objects as Objects1 on Objects1.Id = tp.Object1
inner join Objects as Objects2 on Objects2.Id = tp.Object2
inner join Objects as Objects3 on Objects3.Id = tp.Object3
-- Then add all possible children of all those objects
inner join Objects as Children1 on Children1.LeftIndex between Objects1.LeftIndex and Objects1.RightIndex
inner join Objects as Children2 on Children2.LeftIndex between Objects2.LeftIndex and Objects2.RightIndex
inner join Objects as Children3 on Children3.LeftIndex between Objects3.LeftIndex and Objects3.RightIndex
) as x
WHERE idx = 1
GO
---------- ANSWER 1 SCHEMA --------
CREATE VIEW TPIntermediate AS
select tp.Property, tp.Value
, Children1.Id as O1, Children2.Id as O2, Children3.Id as O3
, Objects1.LeftIndex as PL1, Objects2.LeftIndex as PL2, Objects3.LeftIndex as PL3
, Children1.LeftIndex as CL1, Children2.LeftIndex as CL2, Children3.LeftIndex as CL3
from tp
-- Select corresponding objects of the triple
inner join Objects as Objects1 on Objects1.Id = tp.Object1
inner join Objects as Objects2 on Objects2.Id = tp.Object2
inner join Objects as Objects3 on Objects3.Id = tp.Object3
-- Then add all possible children of all those objects
inner join Objects as Children1 WITH (INDEX(ix_LeftIndex)) on Children1.LeftIndex between Objects1.LeftIndex and Objects1.RightIndex
inner join Objects as Children2 WITH (INDEX(ix_LeftIndex)) on Children2.LeftIndex between Objects2.LeftIndex and Objects2.RightIndex
inner join Objects as Children3 WITH (INDEX(ix_LeftIndex)) on Children3.LeftIndex between Objects3.LeftIndex and Objects3.RightIndex
GO
---------- ANSWER 2 SCHEMA --------
-- Partial calculation using an indexed view
-- Circumvented the self-join limitation using a black magic technique, based on
-- http://jmkehayias.blogspot.com/2008/12/creating-indexed-view-with-self-join.html
CREATE TABLE dbo.multiplier (i INT PRIMARY KEY)
INSERT INTO dbo.multiplier VALUES (1)
INSERT INTO dbo.multiplier VALUES (2)
INSERT INTO dbo.multiplier VALUES (3)
GO
CREATE VIEW TPIndexed
WITH SCHEMABINDING
AS
SELECT tp.Object1, tp.object2, tp.object3, tp.property, tp.value,
SUM(ISNULL(CASE M.i WHEN 1 THEN Objects.LeftIndex ELSE NULL END, 0)) as PL1,
SUM(ISNULL(CASE M.i WHEN 2 THEN Objects.LeftIndex ELSE NULL END, 0)) as PL2,
SUM(ISNULL(CASE M.i WHEN 3 THEN Objects.LeftIndex ELSE NULL END, 0)) as PL3,
SUM(ISNULL(CASE M.i WHEN 1 THEN Objects.RightIndex ELSE NULL END, 0)) as PR1,
SUM(ISNULL(CASE M.i WHEN 2 THEN Objects.RightIndex ELSE NULL END, 0)) as PR2,
SUM(ISNULL(CASE M.i WHEN 3 THEN Objects.RightIndex ELSE NULL END, 0)) as PR3,
COUNT_BIG(*) as ID
FROM dbo.tp
cross join dbo.multiplier M
inner join dbo.Objects
on (M.i = 1 AND Objects.Id = tp.Object1)
or (M.i = 2 AND Objects.Id = tp.Object2)
or (M.i = 3 AND Objects.Id = tp.Object3)
GROUP BY tp.Object1, tp.object2, tp.object3, tp.property, tp.value
GO
-- This index is mostly useless but required
create UNIQUE CLUSTERED index pk_TPIndexed on dbo.TPIndexed(property, value, object1, object2, object3)
-- Once we have the clustered index, we can create a nonclustered that actually addresses our needs
create NONCLUSTERED index ix_TPIndexed on dbo.TPIndexed(property, value, PL1, PL2, PL3, PR1, PR2, PR3)
GO
-- NOTE: this View is not indexed, but is uses the indexed view
CREATE VIEW TPIndexedResultView AS
Select O1, O2, O3, Property, Value
FROM
(
select Children1.Id as O1, Children2.Id as O2, Children3.Id as O3, tp.Property, tp.Value,
row_number() over(
partition by tp.Property, Children1.Id, Children2.Id, Children3.Id
order by tp.Property, Tp.PL1 desc, Tp.PL2 desc, Tp.PL3 desc
)
as Idx
from TPIndexed as TP WITH (NOEXPAND)
-- Then add all possible children of all those objects
inner join Objects as Children1 WITH (INDEX(ix_LeftIndex)) on Children1.LeftIndex between TP.PL1 and TP.PR1
inner join Objects as Children2 WITH (INDEX(ix_LeftIndex)) on Children2.LeftIndex between TP.PL2 and TP.PR2
inner join Objects as Children3 WITH (INDEX(ix_LeftIndex)) on Children3.LeftIndex between TP.PL3 and TP.PR3
) as x
WHERE idx = 1
GO
-- NOTE: this View is not indexed, but is uses the indexed view
CREATE VIEW TPIndexedIntermediate AS
select tp.Property, tp.Value
, Children1.Id as O1, Children2.Id as O2, Children3.Id as O3
, PL1, PL2, PL3
, Children1.LeftIndex as CL1, Children2.LeftIndex as CL2, Children3.LeftIndex as CL3
from TPIndexed as TP WITH (NOEXPAND)
-- Then add all possible children of all those objects
inner join Objects as Children1 WITH (INDEX(ix_LeftIndex)) on Children1.LeftIndex between TP.PL1 and TP.PR1
inner join Objects as Children2 WITH (INDEX(ix_LeftIndex)) on Children2.LeftIndex between TP.PL2 and TP.PR2
inner join Objects as Children3 WITH (INDEX(ix_LeftIndex)) on Children3.LeftIndex between TP.PL3 and TP.PR3
GO
---------- ANSWER 3 SCHEMA --------
-- You're talking about making six copies of the TP table
-- If you're going to go that far, you might as well, go the trigger route
-- The performance profile is much the same - slower on insert, faster on read
-- And instead of still recalculating on every read, you'll be recalculating
-- only when the data changes.
CREATE TABLE TPResult
(
Object1 int not null references Objects,
Object2 int not null references Objects,
Object3 int not null references Objects,
Property varchar(20) not null,
Value varchar(50) not null
)
GO
create UNIQUE index ix_Result on TPResult(Property, Value, Object1, Object2, Object3)
--You'll have to imagine this trigger, sql fiddle doesn't want to do it
--CREATE TRIGGER tr_TP
--ON TP
-- FOR INSERT, UPDATE, DELETE
--AS
-- DELETE FROM TPResult
-- -- For this example we'll just insert into the table once
INSERT INTO TPResult
SELECT O1, O2, O3, Property, Value
FROM TPResultView
从 sqlfiddle 查询我的部分答案:
-------- QUESTION QUERY ----------
-- Original query, modified to use the view I added
SELECT O1, O2, O3, Property, Value
FROM TPResultView
WHERE property = 'P1' AND value = 'abc'
-- Your assertion is that this order by is the most expensive part.
-- Sometimes converting queries into views allows the server to
-- Optimize them better over time.
-- NOTE: removing this order by has no effect on this query.
-- ORDER BY O1, O2, O3
GO
-------- ANSWER 1 QUERY ----------
-- A different way to get the same result.
-- Query optimizer says this is more expensive, but I've seen cases where
-- it says a query is more expensive but it returns results faster.
SELECT O1, O2, O3, Property, Value
FROM (
SELECT A.O1, A.O2, A.O3, A.Property, A.Value
FROM TPIntermediate A
LEFT JOIN TPIntermediate B ON A.O1 = B.O1
AND A.O2 = B.O2
AND A.O3 = B.O3
AND A.Property = B.Property
AND
(
-- Find any rows with Parent LeftIndex triplet that is greater than this one
(A.PL1 < B.PL1
AND A.PL2 < B.PL2
AND A.PL3 < B.PL3)
OR
-- Find any rows with LeftIndex triplet that is greater than this one
(A.CL1 < B.CL1
AND A.CL2 < B.CL2
AND A.CL3 < B.CL3)
)
-- If this row has any rows that match the previous two cases, exclude it
WHERE B.O1 IS NULL ) AS x
WHERE property = 'P1' AND value = 'abc'
-- NOTE: Removing this order _DOES_ reduce query cost removing the "sort" action
-- that has been the focus of your question.
-- Howeer, it wasn't clear from your question whether this order by was required.
--ORDER BY O1, O2, O3
GO
-------- ANSWER 2 QUERIES ----------
-- Same as above but using an indexed view to partially calculate results
SELECT O1, O2, O3, Property, Value
FROM TPIndexedResultView
WHERE property = 'P1' AND value = 'abc'
-- Your assertion is that this order by is the most expensive part.
-- Sometimes converting queries into views allows the server to
-- Optimize them better over time.
-- NOTE: removing this order by has no effect on this query.
--ORDER BY O1, O2, O3
GO
SELECT O1, O2, O3, Property, Value
FROM (
SELECT A.O1, A.O2, A.O3, A.Property, A.Value
FROM TPIndexedIntermediate A
LEFT JOIN TPIndexedIntermediate B ON A.O1 = B.O1
AND A.O2 = B.O2
AND A.O3 = B.O3
AND A.Property = B.Property
AND
(
-- Find any rows with Parent LeftIndex triplet that is greater than this one
(A.PL1 < B.PL1
AND A.PL2 < B.PL2
AND A.PL3 < B.PL3)
OR
-- Find any rows with LeftIndex triplet that is greater than this one
(A.CL1 < B.CL1
AND A.CL2 < B.CL2
AND A.CL3 < B.CL3)
)
-- If this row has any rows that match the previous two cases, exclude it
WHERE B.O1 IS NULL ) AS x
WHERE property = 'P1' AND value = 'abc'
-- NOTE: Removing this order _DOES_ reduce query cost removing the "sort" action
-- that has been the focus of your question.
-- Howeer, it wasn't clear from your question whether this order by was required.
--ORDER BY O1, O2, O3
GO
-------- ANSWER 3 QUERY ----------
-- Returning results from a pre-calculated table is fast and easy
-- Unless your are doing many more inserts than reads, or your result
-- set is very large, this is a fine way to compensate for a poor design
-- in one area of your database.
SELECT Object1 as O1, Object2 as O2, Object3 as O3, Property, Value
FROM TPResult
WHERE property = 'P1' AND value = 'abc'
ORDER BY O1, O2, O3