Train the algorithms

GaussianNB


iris = datasets.load_iris()
dataset = datasets.make_classification(n_samples = 1000, n_features= 10,
n_informative= 2, n_redundant = 2, n_repeated = 0, n_classes = 2)

#10 - fold 交叉检验
kf = cross_validation.KFold(1000, n_folds = 10, shuffle = True)

#获取数据
for train_index, test_index in kf:
X_train, X_test = dataset[0][train_index],dataset[0][test_index]
y_train, y_test = dataset[1][train_index],dataset[1][test_index]

#高斯朴素贝叶斯
clf = GaussianNB()
clf.fit(X_train, y_train)
pred = clf.predict(X_test)


acc = metrics.accuracy_score(y_test, pred)
f1 = metrics.f1_score(y_test, pred)
auc = metrics.roc_auc_score(y_test, pred)

结果如下:

第十五周作业 —— sklearn


SVC (possible C values [1e-02, 1e-01, 1e00, 1e01, 1e02], RBF kernel)

def rbf_svm(X_train, y_train, X_test, C):
clf = SVC(C = C, kernel = 'rbf', class_weight= 'balanced')
clf.fit(X_train,y_train)
return clf.predict(X_test)

#样本数量
n_sam = 1000

iris = datasets.load_iris()
dataset = datasets.make_classification(n_samples = n_sam , n_features= 10,
n_informative= 2, n_redundant = 2, n_repeated = 0, n_classes = 2)

#使用10fold交叉检验
kf = cross_validation.KFold(n_sam , n_folds = 10, shuffle = True)

accuracy = []
f1 = []
auc_roc = []

for train_index, test_index in kf:
X_train, X_test = dataset[0][train_index],dataset[0][test_index]
y_train, y_test = dataset[1][train_index],dataset[1][test_index]
nn = len(X_train)
bestC = None
Cvalues = [1e-2, 1e-1, 1e0, 1e1, 1e2]
innerscore = []

#用不同的参数C来对数据集进行训练
for C in Cvalues:
#内部使用5fold交叉检验
ikf = cross_validation.KFold(nn, n_folds = 5, shuffle = True, random_state =5678)
innerf1 = []
for t_index, v_index in ikf:
X_t, X_v = X_train[t_index], X_train[v_index]
y_t, y_v = y_train[t_index], y_train[v_index]

ipred = rbf_svm(X_t, y_t, X_v, C)

#对检验的f1score进行保存
innerf1.append(metrics.f1_score(y_v, ipred))

innerscore.append(sum(innerf1) / len(innerf1))

#选出能得出最好的f1score的C
bestC = Cvalues[np.argmax(innerscore)]


#预测分类结果
pred = rbf_svm(X_train, y_train, X_test, bestC)

accuracy.append(metrics.accuracy_score(y_test,pred))

f1.append(metrics.f1_score(y_test,pred))

auc_roc.append(metrics.roc_auc_score(y_test, pred))

结果如下:

第十五周作业 —— sklearn

RandomForestClassifier (possible n estimators values [10, 100, 1000])

#用不同参数构建分类器
def rbf_ranf(X_train, y_train, X_test, nest):
clf = RandomForestClassifier(n_estimators= nest)
clf.fit(X_train,y_train)
return clf.predict(X_test)
#样本数量
n_sam = 1000

iris = datasets.load_iris()
dataset = datasets.make_classification(n_samples = n_sam , n_features= 10,
n_informative= 2, n_redundant = 2, n_repeated = 0, n_classes = 2)

#使用10fold交叉检验
kf = cross_validation.KFold(n_sam , n_folds = 10, shuffle = True)

accuracy = []
f1 = []
auc_roc = []

for train_index, test_index in kf:
X_train, X_test = dataset[0][train_index],dataset[0][test_index]
y_train, y_test = dataset[1][train_index],dataset[1][test_index]
nn = len(X_train)
bestN = None
N_est= [10, 100, 1000]
innerscore = []

#用不同的参数n_estimator来对数据集进行训练
for nest in N_est:
ikf = cross_validation.KFold(nn, n_folds = 5, shuffle = True, random_state =5678)
innerf1 = []
for t_index, v_index in ikf:
X_t, X_v = X_train[t_index], X_train[v_index]
y_t, y_v = y_train[t_index], y_train[v_index]

ipred = rbf_ranf(X_t, y_t, X_v, nest)

innerf1.append(metrics.f1_score(y_v, ipred))

innerscore.append(sum(innerf1) / len(innerf1))

bestN = N_est[np.argmax(innerscore)]

#预测分类结果
pred = rbf_ranf(X_train, y_train, X_test, bestN)

accuracy.append(metrics.accuracy_score(y_test,pred))

f1.append(metrics.f1_score(y_test,pred))

auc_roc.append(metrics.roc_auc_score(y_test, pred))

结果如下:

第十五周作业 —— sklearn


结论,从结果来看,随机森林训练法最好

相关文章:

  • 2021-05-30
  • 2022-01-03
猜你喜欢
  • 2021-04-27
  • 2021-08-24
  • 2021-09-12
  • 2021-09-03
相关资源
相似解决方案