【发布时间】:2014-12-27 05:06:08
【问题描述】:
我有以下代码块,它使用线性回归(最小二乘法)计算趋势线的公式。它只是找到X和Y轴的R-Squared和相关系数。
如果 X 和 Y 轴是 int 和 float,这将计算精确值。
CREATE FUNCTION [dbo].[LinearReqression] (@Data AS XML)
RETURNS TABLE AS RETURN (
WITH Array AS (
SELECT x = n.value('@x', 'float'),
y = n.value('@y', 'float')
FROM @Data.nodes('/r/n') v(n)
),
Medians AS (
SELECT xbar = AVG(x), ybar = AVG(y)
FROM Array ),
BetaCalc AS (
SELECT Beta = SUM(xdelta * (y - ybar)) / NULLIF(SUM(xdelta * xdelta), 0)
FROM Array
CROSS JOIN Medians
CROSS APPLY ( SELECT xdelta = (x - xbar) ) xd ),
AlphaCalc AS (
SELECT Alpha = ybar - xbar * beta
FROM Medians
CROSS JOIN BetaCalc),
SSCalc AS (
SELECT SS_tot = SUM((y - ybar) * (y - ybar)),
SS_err = SUM((y - (Alpha + Beta * x)) * (y - (Alpha + Beta * x)))
FROM Array
CROSS JOIN Medians
CROSS JOIN AlphaCalc
CROSS JOIN BetaCalc )
SELECT r_squared = CASE WHEN SS_tot = 0 THEN 1.0
ELSE 1.0 - ( SS_err / SS_tot ) END,
Alpha, Beta
FROM AlphaCalc
CROSS JOIN BetaCalc
CROSS JOIN SSCalc
)
用法:
DECLARE @DataTable TABLE (
SourceID INT,
x Date,
y FLOAT
) ;
INSERT INTO @DataTable ( SourceID, x, y )
SELECT ID = 0, x = 1.2, y = 1.0
UNION ALL SELECT 1, 1.6, 1
UNION ALL SELECT 2, 2.0, 1.5
UNION ALL SELECT 3, 2.0, 1.75
UNION ALL SELECT 4, 2.1, 1.85
UNION ALL SELECT 5, 2.1, 2
UNION ALL SELECT 6, 2.2, 3
UNION ALL SELECT 7, 2.2, 3
UNION ALL SELECT 8, 2.3, 3.5
UNION ALL SELECT 9, 2.4, 4
UNION ALL SELECT 10, 2.5, 4
UNION ALL SELECT 11, 3, 4.5 ;
-- Create and view XML data array
DECLARE @DataXML XML ;
SET @DataXML = (
SELECT -- FLOAT values are formatted in XML like "1.000000000000000e+000", increasing the character count
-- Converting them to VARCHAR first keeps the XML small without sacrificing precision
-- They are unpacked as FLOAT in the function either way
[@x] = CAST(x AS VARCHAR(20)),
[@y] = CAST(y AS VARCHAR(20))
FROM @DataTable
FOR XML PATH('n'), ROOT('r') ) ;
SELECT @DataXML ;
-- Get the results
SELECT * FROM dbo.LinearReqression (@DataXML) ;
在我的情况下,X 轴也可能是 日期 列?那么如何计算与日期列相同的回归分析呢?
【问题讨论】:
-
日期可以转换为 float(自 1970 年 1 月 1 日以来的小数天)或 bigint(从您选择的任何时间点算起的秒数)
标签: sql sql-server linear-regression