【问题标题】:Can I improve the performance of this JSON conversion in SQL Server 2016?我可以在 SQL Server 2016 中提高这种 JSON 转换的性能吗?
【发布时间】:2018-03-06 01:22:38
【问题描述】:

我有一个类似于以下的表格 (以下代码创建了一个名为#Temp 的表。它有 160,000 行,这与我在真实数据集中使用的行数大致相同,但在真实数据集中有更多列):

/* Create dummy employees*/

;WITH employeeNumbers
AS ( SELECT 1 AS employeeId
     UNION ALL
     SELECT employeeNumbers.employeeId + 1
     FROM   employeeNumbers
     WHERE  employeeNumbers.employeeId < 16000 )
SELECT *
INTO   #employeeId
FROM   employeeNumbers
OPTION ( MAXRECURSION 16000 )


/*Create saleItems*/
CREATE TABLE #SalesItems
    (
        category VARCHAR(100)
      , subCategory VARCHAR(100)
      , productName VARCHAR(1000)
    )
INSERT INTO #SalesItems ( category
                        , subCategory
                        , productName )
VALUES ( 'Furniture', 'Bookcases', 'Bush Somerset Collection Bookcase' )
     , ( 'Furniture', 'Chairs', 'Hon Deluxe Fabric Upholstered Stacking Chairs, Rounded Back' )
     , ( 'Office Supplies', 'Labels', 'Self-Adhesive Address Labels for Typewriters by Universal' )
     , ( 'Furniture', 'Tables', 'Bretford CR4500 Series Slim Rectangular Table' )
     , ( 'Office Supplies', 'Storage', 'Eldon Fold n Roll Cart System' )
     , ( 'Furniture', 'Furnishings', 'Eldon Expressions Wood and Plastic Desk Accessories, Cherry Wood' )
     , ( 'Office Supplies', 'Art', 'Newell 322' )
     , ( 'Technology', 'Phones', 'Mitel 5320 IP Phone VoIP phone' )
     , ( 'Office Supplies', 'Binders', 'DXL Angle-View Binders with Locking Rings by Samsill' )
     , ( 'Technology', 'Phones', 'Samsung Galaxy S8' )

-- Create some random sales figures between 10 and 100
SELECT employeeId
     , category
     , subCategory
     , productName
     , CONVERT(DECIMAL(13, 2), 10 + ( 100 - 10 ) * RAND(CHECKSUM(NEWID()))) [Jul 2017]
     , CONVERT(DECIMAL(13, 2), 10 + ( 100 - 10 ) * RAND(CHECKSUM(NEWID()))) [Aug 2017]
     , CONVERT(DECIMAL(13, 2), 10 + ( 100 - 10 ) * RAND(CHECKSUM(NEWID()))) [Sep 2017]
     , CONVERT(DECIMAL(13, 2), 10 + ( 100 - 10 ) * RAND(CHECKSUM(NEWID()))) [Oct 2017]
     , CONVERT(DECIMAL(13, 2), 10 + ( 100 - 10 ) * RAND(CHECKSUM(NEWID()))) [Nov 2017]
     , CONVERT(DECIMAL(13, 2), 10 + ( 100 - 10 ) * RAND(CHECKSUM(NEWID()))) [Dec 2017]
INTO   #Temp
FROM   #employeeId
JOIN   #SalesItems ON 1 = 1

CREATE INDEX empId
    ON #Temp ( employeeId )

SELECT *
FROM   #Temp

我正在做的是将这些结果转换为表中每个员工 ID 的单个 json 字符串。我的查询如下:

SELECT DISTINCT x.employeeId
              , (   SELECT y.category
                         , y.subCategory
                         , y.productName
                         , [Jul 2017] AS 'salesAmounts.Jul 2017'
                         , [Aug 2017] AS 'salesAmounts.Aug 2017'
                         , [Sep 2017] AS 'salesAmounts.Sep 2017'
                         , [Oct 2017] AS 'salesAmounts.Oct 2017'
                         , [Nov 2017] AS 'salesAmounts.Nov 2017'
                         , [Dec 2017] AS 'salesAmounts.Dec 2017'
                    FROM   #Temp y
                    WHERE  y.employeeId = x.employeeId
                    FOR JSON PATH, INCLUDE_NULL_VALUES ) data
FROM   #Temp x

这行得通,但它的性能不是很好。在此示例中,执行此操作需要 25 秒,但在我的真实数据集中需要更长的时间。从#Temp 表返回所有结果需要 1 秒。 无论如何我可以在这里重新设计我的查询以改善查询时间吗?我确实尝试使用游标遍历每个employeeId 并以这种方式生成json字符串,但它仍然很糟糕。

【问题讨论】:

    标签: sql json sql-server tsql sql-server-2016


    【解决方案1】:

    Read "Performance Surprises and Assumptions : GROUP BY vs. DISTINCT" by Aaron Bertrand

    尝试使用GROUP BY 而不是DISTINCTDISTINCT 在创建结果集之后 丢弃重复项,从而比需要更频繁地调用 JSON。 GROUP BY 应该首先将集合减少到不同的 employeeId 值,并为每个值只执行一次 JSON。

    目前无法对其进行测试,但这应该会做同样的事情,只是更快:

    SELECT x.employeeId
                  , (   SELECT y.category
                             , y.subCategory
                             , y.productName
                             , [Jul 2017] AS 'salesAmounts.Jul 2017'
                             , [Aug 2017] AS 'salesAmounts.Aug 2017'
                             , [Sep 2017] AS 'salesAmounts.Sep 2017'
                             , [Oct 2017] AS 'salesAmounts.Oct 2017'
                             , [Nov 2017] AS 'salesAmounts.Nov 2017'
                             , [Dec 2017] AS 'salesAmounts.Dec 2017'
                        FROM   #Temp y
                        WHERE  y.employeeId = x.employeeId
                        FOR JSON PATH, INCLUDE_NULL_VALUES ) data
    FROM   #Temp x
    GROUP BY x.EmployeeId
    

    【讨论】:

    • 谢谢。我不敢相信我没想过要尝试这个。没错,它在 1 秒内运行。
    • @Liv,很高兴为您提供帮助。 Aaron Bertrand 是对的:Performance Surprise :-D
    猜你喜欢
    • 2019-10-19
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2020-07-11
    • 1970-01-01
    • 2012-02-11
    • 1970-01-01
    相关资源
    最近更新 更多