import numpy as np import matplotlib.pyplot as plt import matplotlib matplotlib.rcParams['font.sans-serif'] = ['SimHei'] matplotlib.rcParams['font.family']='sans-serif' matplotlib.rcParams['axes.unicode_minus'] = False
In [2]:
def loadDataSet(filename):
X = []
Y = []
with open(filename, 'rb') as f:
for idx, line in enumerate(f):
line = line.decode('utf-8').strip()
if not line:
continue
eles = line.split()
if idx == 0:
numFea = len(eles)
eles = map(float, eles)
X.append(eles[:-1])
Y.append([eles[-1]])
return np.array(X), np.array(Y)
我们的假设函数是:
hθ(x)=θXhθ(x)=θX
X:m∗nX:m∗n
θ:n∗1θ:n∗1
hθ:m∗1hθ:m∗1
In [3]:
def h(theta, X):
return np.dot(X, theta)
我们的代价函数是:
J(θ0,θ1)=12m∑i=1m(hθ(x(i))−y(i))2J(θ0,θ1)=12m∑i=1m(hθ(x(i))−y(i))2
In [4]:
def J(theta, X, Y):
m = len(X)
return np.sum(np.dot((h(theta,X)-Y).T , (h(theta,X)-Y)) / (2 * m))
我们的梯度下降更新公式是:
θj:=θj−α1m∑i=1m(hθ(x(i))−y(i))⋅x(i)jθj:=θj−α1m∑i=1m(hθ(x(i))−y(i))⋅xj(i)
In [5]:
def bgd(alpha, maxloop, epsilon, X, Y):
m,n = X.shape # m是样本数,n是特征数(包括了全部是1的x0),其实也就是参数theta的个数
theta = np.zeros((n,1)) # 参数theta全部初始化为0
count = 0 # 记录迭代轮次
converged = False # 是否已经收敛的标志
error = np.inf # 当前的代价函数值
errors = [J(theta, X, Y),] # 记录每一次迭代得代价函数值
thetas = {}
for i in range(n):
thetas[i] = [theta[i, 0], ] # 记录每一个theta j的历史更新
while count<=maxloop:
if(converged):
break
count = count + 1
# 这里,我们的梯度计算统一了
for j in range(n):
deriv = np.dot(X[:,j].T, (h(theta, X) - Y)).sum() / m
thetas[j].append(theta[j, 0] - alpha*deriv)
for j in range(n):
theta[j,0] = thetas[j][-1]
error = J(theta, X, Y)
errors.append(error)
if(abs(errors[-1] - errors[-2]) < epsilon):
converged = True
return theta,errors,thetas
In [6]:
def standarize(X):
"""特征标准化处理
Args:
X 样本集
Returns:
标准后的样本集
"""
m, n = X.shape
# 归一化每一个特征
for j in range(n):
features = X[:,j]
meanVal = features.mean(axis=0)
std = features.std(axis=0)
if std != 0:
X[:, j] = (features-meanVal)/std
else:
X[:, j] = 0
return X
In [7]:
ori_X, Y = loadDataSet('./data/houses.txt') # 从南京链家抓取的夫子庙附近的房屋数据
print ori_X.shape
print Y.shape
(47, 2) (47, 1)
In [8]:
m, n = ori_X.shape X = standarize(ori_X.copy()) X = np.concatenate((np.ones((m ,1)), X), axis=1)
In [9]:
alpha = 1 # 学习率 maxloop = 5000 # 最大迭代次数 epsilon = 0.000001 # 收敛判断条件 result = bgd(alpha, maxloop, epsilon, X, Y) theta, errors, thetas = result
In [10]:
print 'theta:' print theta print '' # 预测价格 normalizedSize = (70-ori_X[:,0].mean(0))/ori_X[:,0].std(0) normalizedBr = (2-ori_X[:,1].mean(0))/ori_X[:,1].std(0) predicateX = np.matrix([[1, normalizedSize, normalizedBr]]) price = h(theta, predicateX) print '70㎡两居估价: ¥%.4f万元'%price
theta: [[275.55319149] [125.4886707 ] [ 3.7666334 ]] 70㎡两居估价: ¥235.9863万元
In [12]:
%matplotlib inline
from mpl_toolkits.mplot3d import axes3d
from matplotlib import cm
import matplotlib.ticker as mtick
# 打印拟合平面
fittingFig = plt.figure(figsize=(16, 12))
title = 'bgd: rate=%.3f, maxloop=%d, epsilon=%.3f \n'%(alpha,maxloop,epsilon)
ax=fittingFig.gca(projection='3d')
xx = np.linspace(0,200,25)
yy = np.linspace(0,5,25)
zz = np.zeros((25,25))
for i in range(25):
for j in range(25):
normalizedSize = (xx[i]-ori_X[:,0].mean(0))/ori_X[:,0].std(0)
normalizedSize = (xx[i]-ori_X[:,0].mean(0))/ori_X[:,0].std(0)
x = np.matrix([[1,normalizedSize, normalizedBr]])
zz[i,j] = h(theta, x)
xx, yy = np.meshgrid(xx,yy)
ax.zaxis.set_major_formatter(mtick.FormatStrFormatter('%.2e'))
ax.plot_surface(xx, yy, zz, rstride=1, cstride=1, cmap=cm.rainbow, alpha=0.1, antialiased=True)
xs = ori_X[:, 0].flatten()
ys = ori_X[:, 1].flatten()
zs = Y[:, 0].flatten()
ax.scatter(xs, ys, zs, c='b', marker='o')
ax.set_xlabel(u'面积')
ax.set_ylabel(u'卧室数')
ax.set_zlabel(u'估价')
#plt.show()
Out[12]:
Text(0.5,0,u'\u4f30\u4ef7')
In [13]:
%matplotlib inline
errorsFig = plt.figure()
ax = errorsFig.add_subplot(111)
ax.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.2e'))
ax.plot(range(len(errors)), errors)
ax.set_xlabel(u'迭代次数')
ax.set_ylabel(u'代价函数')
Out[13]:
Text(0,0.5,u'\u4ee3\u4ef7\u51fd\u6570')