题目描述:
代码:
from sklearn import cross_validation from sklearn import datasets #step1 d, t = datasets.make_classification(n_samples=1000, n_features=10, n_informative=2, n_redundant=2, n_repeated=0, n_classes=2) #step2 kf = cross_validation.KFold(len(d), n_folds=10, shuffle=True) for train_index, test_index in kf: X_train, y_train = d[train_index], t[train_index] X_test, y_test = d[test_index], t[test_index] print(X_train) print(y_train) print(X_test) print(y_test) #step3 and 4 # Naive Bayes from sklearn.naive_bayes import GaussianNB clf = GaussianNB() clf.fit(X_train, y_train) pred = clf.predict(X_test) print(pred) print(y_test) from sklearn import metrics acc = metrics.accuracy_score(y_test, pred) print(acc) f1 = metrics.f1_score(y_test, pred) print(f1) auc = metrics.roc_auc_score(y_test, pred) print(auc) #SVM from sklearn.svm import SVC clf = SVC(C=1e-01, kernel='rbf', gamma=0.1) clf.fit(X_train, y_train) pred = clf.predict(X_test) print(pred) print(y_test) from sklearn import metrics acc = metrics.accuracy_score(y_test, pred) print(acc) f1 = metrics.f1_score(y_test, pred) print(f1) auc = metrics.roc_auc_score(y_test, pred) print(auc) # Random Forest from sklearn.ensemble import RandomForestClassifier clf = RandomForestClassifier(n_estimators=6) clf.fit(X_train, y_train) pred = clf.predict(X_test) print(pred) print(y_test) from sklearn import metrics acc = metrics.accuracy_score(y_test, pred) print(acc) f1 = metrics.f1_score(y_test, pred) print(f1) auc = metrics.roc_auc_score(y_test, pred) print(auc) #ignore step5