来自莫烦python教程
线性回归
from sklearn import datasets from sklearn.linear_model import LinearRegression data = datasets.load_boston() x = data.data y = data.target #选择线性回归模型 model = LinearRegression() #训练模型 model.fit(x, y) #模型参数 print(model.coef_, model.intercept_) #预测 print(x[1:5]) print(model.predict(x[1:5])) #评价模型得分 print(model.score(x, y))
支持向量机-数据标准化
from sklearn import preprocessing from sklearn.model_selection import train_test_split from sklearn.datasets.samples_generator import make_classification from sklearn.svm import SVC import matplotlib.pyplot as plt x, y = make_classification(n_samples=300, n_features=2, n_informative=2, n_redundant=0, n_repeated=0, n_classes=2, n_clusters_per_class=1, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=100, shuffle=True, random_state=22) # plt.scatter(x[:,0], x[:,1], c=y) # plt.show() x = preprocessing.scale(x) #数据标准化(normalization) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.3) #拆分为训练集和测试集 clf = SVC() clf.fit(x_train, y_train) print(clf.score(x_test, y_test))
knn-交叉验证
from sklearn.model_selection import cross_val_score from sklearn.datasets import load_iris from sklearn.neighbors import KNeighborsClassifier iris = load_iris() x = iris.data y = iris.target knn = KNeighborsClassifier(n_neighbors=5) score = cross_val_score(knn, x, y, cv=5, scoring='accuracy') print(score.mean())
## 对于线性回归
loss = -cross_val_score(knn, x, y, cv=5, scoring='neg_mean_squared_error_')
训练次数-损失曲线
from sklearn.model_selection import learning_curve from sklearn.datasets import load_digits from sklearn.svm import SVC import matplotlib.pyplot as plt import numpy as np digit = load_digits() x = digit.data y = digit.target train_sizes, train_loss, test_loss = learning_curve( SVC(gamma=0.001), x, y, cv=10, scoring='neg_mean_squared_error', train_sizes=[0.1, 0.25, 0.5, 0.75, 1]) train_loss_mean = -np.mean(train_loss, axis=1) test_loss_mean = -np.mean(test_loss, axis=1) plt.plot(train_sizes, train_loss_mean, color='r', label='train_loss') plt.plot(train_sizes, test_loss_mean, color='b', label='test_loss') plt.legend() plt.show()
SVC参数gamma值-损失曲线
from sklearn.model_selection import validation_curve from sklearn.datasets import load_digits from sklearn.svm import SVC import matplotlib.pyplot as plt import numpy as np digit = load_digits() x = digit.data y = digit.target param_range = np.logspace(-6, -2.3, 5) train_loss, test_loss = validation_curve( SVC(), x, y, param_name='gamma', param_range=param_range, cv=10, scoring='neg_mean_squared_error') train_loss_mean = -np.mean(train_loss, axis=1) test_loss_mean = -np.mean(test_loss, axis=1) plt.plot(param_range, train_loss_mean, color='r', label='train_loss') plt.plot(param_range, test_loss_mean, color='b', label='test_loss') plt.xlabel('gamma') plt.ylabel('loss') plt.legend() plt.show()
模型保存和读取
from sklearn.externals import joblib from sklearn.datasets import load_iris from sklearn import svm data = load_iris() x, y = data.data, data.target clf = svm.SVC() clf.fit(x, y) joblib.dump(clf, 'clf') #保存 clf2 = joblib.load('clf') #读取