逻辑回归最小化错误答案

【问题标题】：Error in Minimization of Logistic Regression逻辑回归最小化错误
【发布时间】：2018-02-13 19:55:55
【问题描述】：

我最近尝试使用 BFGS 方法在 python 中实现逻辑回归。但是，不幸的是，在使用 TNC 方法对我来说效果很好时，我得到了尺寸错误。数据集取自 Andrew Ng 机器学习课程。

对不起，我是 python 新手：）这里是：

import numpy as np
import seaborn as sns
import pandas as pd
from scipy.io import loadmat
from scipy.optimize import minimize

%pylab inline

def Sigmoid(z):
    return 1/(1 + np.exp(-z))

def CostFunction(theta, lambda_, X, y):
    m,n = X.shape
theta = theta.reshape((n, 1))
    y = y.reshape((m,1))
    z = X.dot(theta)
    J = - (1/m) * ((y.T).dot(np.log(Sigmoid((z))) + ((1 - y).T).dot(np.log(1 - Sigmoid(z))) + (lambda_)/(2*m) *((theta[1:].T).dot(theta[1:]))))

    return(J[0]);

def Gradient(theta, lambda_,X, y):
    m, n = X.shape
    theta = theta.reshape((n,1))
    y = y.reshape((m,1))
    grad = np.zeros((n,1))
    z = X.dot(theta)
    grad[0] = (1/m) * (X[:,0].T).dot(Sigmoid(z) - y)
    grad[1:n] = (1/m) * (X[:, 1:n].T).dot(Sigmoid(z) - y) + (lambda_/m) * theta[1:n]
    return(grad)

def ova(X, y, n_labels, lambda_):

    initial_theta = np.zeros((X.shape[1], 1)) #401x1
    all_theta = np.zeros((n_labels, X.shape[1])) #10x401
    for c in np.arange(1, n_labels+1):
        res = minimize(CostFunction, initial_theta, args=(lambda_, X, (y == c)*1), method='BFGS',
                   jac= Gradient, options={'maxiter':150})
        all_theta[c-1] = res.x
    return(all_theta)

def predict1(all_theta, X):
    probs = Sigmoid(X.dot(all_theta.T))
    return(np.argmax(probs, axis=1)+1)

weights = loadmat('ex3weights.mat')
weights.keys()

mat = loadmat('ex3data1.mat')  # load mat-file
xdata = mat['X']
ydata = mat['y']
Theta1, Theta2 = weights['Theta1'], weights['Theta2']
print(Theta1.shape, Theta2.shape)

y = pd.DataFrame(ydata)
X = pd.DataFrame(xdata)
m,n = X.shape
X = X.as_matrix()
y = y.as_matrix()

X = np.insert(X, 0, 1, axis = 1)

theta = ova(X, y, 10, 0.1)

这是我收到的错误消息：

ValueError                                Traceback (most recent call last)
<ipython-input-76-cb508c948a6b> in <module>()
----> 1 theta = ova(X, y, 10, 0.1)

<ipython-input-68-f8aa55870333> in ova(X, y, n_labels, lambda_)
     30     for c in np.arange(1, n_labels+1):
     31         res = minimize(CostFunction, initial_theta, args=(lambda_, X, (y == c)*1), method='BFGS',
---> 32                        jac= Gradient, options={'maxiter':150})
     33         all_theta[c-1] = res.x
     34     return(all_theta)

~/anaconda3/lib/python3.6/site-packages/scipy/optimize/_minimize.py in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
    442         return _minimize_cg(fun, x0, args, jac, callback, **options)
    443     elif meth == 'bfgs':
--> 444         return _minimize_bfgs(fun, x0, args, jac, callback, **options)
    445     elif meth == 'newton-cg':
    446         return _minimize_newtoncg(fun, x0, args, jac, hess, hessp, callback,

~/anaconda3/lib/python3.6/site-packages/scipy/optimize/optimize.py in _minimize_bfgs(fun, x0, args, jac, callback, gtol, norm, eps, maxiter, disp, return_all, **unknown_options)
    932             alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
    933                      _line_search_wolfe12(f, myfprime, xk, pk, gfk,
--> 934                                           old_fval, old_old_fval, amin=1e-100, amax=1e100)
    935         except _LineSearchError:
    936             # Line search failed to find a better solution.

~/anaconda3/lib/python3.6/site-packages/scipy/optimize/optimize.py in _line_search_wolfe12(f, fprime, xk, pk, gfk, old_fval, old_old_fval, **kwargs)
    763     ret = line_search_wolfe1(f, fprime, xk, pk, gfk,
    764                              old_fval, old_old_fval,
--> 765                              **kwargs)
    766 
    767     if ret[0] is None:

~/anaconda3/lib/python3.6/site-packages/scipy/optimize/linesearch.py in line_search_wolfe1(f, fprime, xk, pk, gfk, old_fval, old_old_fval, args, c1, c2, amax, amin, xtol)
     95         return np.dot(gval[0], pk)
     96 
---> 97     derphi0 = np.dot(gfk, pk)
     98 
     99     stp, fval, old_fval = scalar_search_wolfe1(

ValueError: shapes (401,1) and (401,1) not aligned: 1 (dim 1) != 401 (dim 0)

【问题讨论】：

标签： python numpy optimization machine-learning logistic-regression

【解决方案1】：

下面的错误信息告诉你一些提示 ValueError：形状（401,1）和（401,1）未对齐：1（dim 1）！= 401（dim 0）你认为你正在做一个 10x401.401x1 的矩阵乘法。但事实证明，这两个矩阵都是 401x1。问题可以追溯到你的 def Gradient。

【讨论】：