高斯拟合在数据集上返回直线？答案

【问题标题】：Gaussian fit returning straight line on data set?高斯拟合在数据集上返回直线？
【发布时间】：2019-09-24 19:21:58
【问题描述】：

我正在尝试使用与上一篇文章相同的技术将高斯拟合到一些数据点： Fitting a better gaussian to data points?

但是，无论我尝试什么，我似乎都无法适应。不管我最初的猜测是什么，我总是以一条直线结束。和上一篇唯一不同的是我的数据集。

代码如下：

  import matplotlib.pyplot as plt
from scipy import asarray as ar, exp, sqrt
from scipy.optimize import curve_fit
import numpy as np

angles = [37, 38, 39, 39.33, 39.66, 40, 40.33, 40.66, 41, 41.33, 41.66, 42, 43]
data = [1612, 1710, 1755, 2692, 4082, 5988, 6672, 6579, 6506, 3865, 2244, 2042, 2057]
angles = ar(angles)
data = ar(data)

n = len(data)

mean = sum(angles)/sum(data)
sigma = sqrt(sum(data*(angles-mean)**2)/sum(data))

def gaus(x,a,mu,sigma):
    return a*np.exp(-(x-mu)**2/((2*sigma)**2))+1500

popt,pcov = curve_fit(gaus,data,angles,p0=[max(data),mean,sigma])

print(popt)
fig = plt.figure()
plt.plot(angles, data, "ob", label = "Measured")
plt.plot(angles,gaus(angles,*popt),'r',label='Fit')
plt.xlim(36, 45)
plt.ylim(1000, 8000)
#plt.xticks(angles)
plt.title("Gaussian Fit")
plt.xlabel("2*theta")
plt.ylabel("Counts")
plt.grid()
plt.legend()
plt.show()

【问题讨论】：

您最初对均值和西格玛的猜测是错误的。正确的猜测是mean = sum(angles)/sum(data) 和sigma = sqrt(sum(data*(angles-mean)**2)/sum(data))
写题的时候不小心把坐标调换了。 37-43 的角度值应该是 x 轴。
我认为上面的评论成立。试试看。
进行该更改会创建一条对角线并引发错误：OptimizeWarning：无法估计参数的协方差。我编辑了上面的主要代码以反映更改。

标签： scipy curve-fitting

【解决方案1】：

我无法将数据拟合到高斯峰方程，查看数据散点图将显示数据本身不具有该形状。我能够将它拟合到汉密尔顿峰值方程“y = Gb * pow(x / mu, log(mu/x)/(B*B)) + (Vbmax * x) / (x + sigma_b)”，在这里是使用您的数据对这个方程进行图形化的 Python 拟合器。

import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import differential_evolution
import warnings

xData = numpy.array([1612, 1710, 1755, 2692, 4082, 5988, 6672, 6579, 6506, 3865, 2244, 2042, 2057], dtype=float)
yData = numpy.array([37, 38, 39, 39.33, 39.66, 40, 40.33, 40.66, 41, 41.33, 41.66, 42, 43], dtype=float)


def func(x, Gb, mu, B, Vbmax, sigma_b): # Hamilton peak equation from zunzun.com
    return Gb * numpy.power(x / mu, numpy.log(mu/x)/(B*B)) + (Vbmax * x) / (x + sigma_b)


# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
    warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
    val = func(xData, *parameterTuple)
    return numpy.sum((yData - val) ** 2.0)


def generate_Initial_Parameters():
    # min and max used for bounds
    maxX = max(xData)
    minX = min(xData)
    maxY = max(yData)
    minY = min(yData)

    parameterBounds = []
    parameterBounds.append([0.0, maxY]) # search bounds for Gb
    parameterBounds.append([minX, maxX]) # search bounds for mu
    parameterBounds.append([0.0, 1.0]) # search bounds for B
    parameterBounds.append([minY, maxY]) # search bounds for Vbmax
    parameterBounds.append([0.0, minX]) # search bounds for sigma_b

    # "seed" the numpy random number generator for repeatable results
    result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
    return result.x

# by default, differential_evolution completes by calling curve_fit() using parameter bounds
geneticParameters = generate_Initial_Parameters()

# now call curve_fit without passing bounds from the genetic algorithm,
# just in case the best fit parameters are aoutside those bounds
fittedParameters, pcov = curve_fit(func, xData, yData, geneticParameters)
print('Fitted parameters:', fittedParameters)
print()

modelPredictions = func(xData, *fittedParameters) 

absError = modelPredictions - yData

SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))

print()
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

print()


##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
    axes = f.add_subplot(111)

    # first the raw data as a scatter plot
    axes.plot(xData, yData,  'D')

    # create data for the fitted equation plot
    xModel = numpy.linspace(min(xData), max(xData), 100)
    yModel = func(xModel, *fittedParameters)

    # now the model as a line plot
    axes.plot(xModel, yModel)

    axes.set_xlabel('X Data') # X axis data label
    axes.set_ylabel('Y Data') # Y axis data label

    plt.show()
    plt.close('all') # clean up after using pyplot

graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)

根据 cmets 和转置数据进行更新偏移量作为高斯峰值方程的拟合参数，从数据值看初始参数估计

import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit

xData = numpy.array([37, 38, 39, 39.33, 39.66, 40, 40.33, 40.66, 41, 41.33, 41.66, 42, 43], dtype=float)
yData = numpy.array([1612, 1710, 1755, 2692, 4082, 5988, 6672, 6579, 6506, 3865, 2244, 2042, 2057], dtype=float)


def func(x, a, b, c, offset):
    return a * numpy.exp(-0.5 * numpy.power((x-b) / c, 2.0)) + offset


# initial parameter estimates from the data
a = max(xData)
b = max((xData) - min(xData)) / 2.0 + min(xData)
c = 1.0 # my guess from the equation
offset = min(yData)
initialParameters = numpy.array([a, b, c, offset])

# curve fit the test data
fittedParameters, pcov = curve_fit(func, xData, yData, initialParameters)

modelPredictions = func(xData, *fittedParameters) 

absError = modelPredictions - yData

SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))

print('Parameters:', fittedParameters)
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

print()


##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
    axes = f.add_subplot(111)

    # first the raw data as a scatter plot
    axes.plot(xData, yData,  'D')

    # create data for the fitted equation plot
    xModel = numpy.linspace(min(xData), max(xData))
    yModel = func(xModel, *fittedParameters)

    # now the model as a line plot
    axes.plot(xModel, yModel)

    axes.set_xlabel('X Data') # X axis data label
    axes.set_ylabel('Y Data') # Y axis data label

    plt.show()
    plt.close('all') # clean up after using pyplot

graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)

【讨论】：

看来我在输入问题时不小心调换了 x 和 y 坐标。这就是它看起来不像高斯分布的原因。
请查看我的编辑标记为更新数据转置。