#导包 import sklearn import numpy as np from matplotlib import pyplot as plt import pandas as pd import matplotlib %matplotlib inline plt.rcParams[\'font.sans-serif\'] = [\'SimHei\'] #创造数据 import pandas as pd np.random.seed(40) x = np.arange(10,110) # print(x.shape) x_shift = np.random.normal(size=x.shape) #生成100个正太分布数据 x = x+x_shift # print(x_shift) # #直方图展示数据 # data = pd.DataFrame(x_shift) # data.plot(kind=\'hist\') error = np.random.normal(size=x.shape)*30 # 噪声/误差 y = 2 * x + 5 + error # print(y) # plt.plot(kind=\'scatter\',x,y) plt.scatter(x,y)
#分割数据集
from sklearn.model_selection import train_test_split dataset = [(i,j) for i,j in zip(x,y)] #整合x和y # print(dataset) train_selt,test_set = train_test_split(dataset,test_size=0.2,random_state=30) print(len(train_set)) #80 trainX = np.array([i for i,j in train_selt]).reshape(-1,1) trainY = np.array([j for i,j in train_selt]).reshape(-1,1) testX = np.array([i for i,j in test_set]).reshape(-1,1) testY = np.array([j for i,j in test_set]).reshape(-1,1)
#训练模型
from sklearn import linear_model # 构造线性回归器 linear_regressor = linear_model.LinearRegression() linear_regressor.fit(trainX,trainY) # 此处预测trainX y_predict = linear_regressor.predict(trainX) # print(y_predict) plt.scatter(trainX,y_predict,marker=\'*\',label=\'预测点\') plt.scatter(trainX,trainY,marker=\'>\',label=\'原始点\') plt.legend()
#查看模型得分
from sklearn import metrics print(\'平均绝对误差:{}\'.format(metrics.mean_absolute_error(y_predict,trainY))) print(\'均方差:{}\'.format(metrics.mean_squared_error(y_predict,trainY))) print(\'解释方差分:{}\'.format(metrics.explained_variance_score(y_predict,trainY))) print(\'R2得分:{}\'.format(metrics.r2_score(y_predict,trainY)))
#在测试集上验证模型
from sklearn import linear_model # # 此处预测trainX y_predict = linear_regressor.predict(testX) # print(y_predict) plt.scatter(testX,y_predict,marker=\'*\',label=\'预测点\') plt.scatter(testX,testY,marker=\'>\',label=\'原始点\') plt.legend()
#模型的保存与加载
#保存路径 save_path = \'./linearmodel.txt\' from sklearn.externals import joblib #模型的保存 joblib.dump(linear_regressor,save_path) #模型的加载 mymodel = joblib.load(save_path) result = mymodel.predict([[100]]) k = mymodel.coef-[0][0] b = mymodel.intercept_[0] fy = K*100 + 6 print(result)
#模型的加载