导入相关库
import pandas as pd
import matplotlib.pyplot as plt
from lightgbm import LGBMClassifier
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
from sklearn.metrics import roc_curve
处理预处理
#读取数据集
test = pd.read_csv("/home/tarena/test/web/day11/data_all.csv")
x = test.drop(['status'],axis=1)
y = test["status"]
#数据三七分,随机种子2018
X_train,X_test,y_train,y_test = train_test_split(x, y, test_size =0.3,random_state=2018)
搭建模型并获取相关值
1,逻辑回归
#搭建模型
#逻辑回归模型
lr = LogisticRegression()
lr.fit(X_train,y_train)
y_pre_lr = lr.predict(X_test)
# 计算accuracy
lr_accuracy = metrics.accuracy_score(y_test,y_pre_lr)
# 计算precision
lr_precision_score = metrics.precicion_score(y_test,y_pre_lr)
# 计算recall
lr_recall_score = metrics.recall_score(y_test,y_pre_lr)
# 计算f1
lr_f1_score = metrics.f1_score(y_test,y_pre_lr)
# 计算auc
lr_roc_auc_score = metrics.roc_auc_score(y_test,y_pre_lr)
2,SVM模型
Svc = LinearSVC()
Svc.fit(X_train,y_train)
y_pre_svc = svc.predict(X_test)
# 计算accuracy
svc_accuracy = metrics.accuracy_score(y_test,y_pre_lr)
# 计算precision
svc_precision_score = metrics.precicion_score(y_test,y_pre_svc)
# 计算recall
svc_recall_score = metrics.recall_score(y_test,y_pre_svc)
# 计算f1
svc_f1_score = metrics.f1_score(y_test,y_pre_svc)
# 计算auc
svc_roc_auc_score = metrics.roc_auc_score(y_test,y_pre_svc)
3,决策树
dt = DecisionTreeClassifier()
dt.fit(X_train,y_train)
y_pre_dt = dt.predict(X_test)
# 计算accuracy
dt_accuracy = metrics.accuracy_score(y_test,y_pre_dt)
# 计算precision
dt_precision_score = metrics.precicion_score(y_test,y_pre_dt)
# 计算recall
dt_recall_score = metrics.recall_score(y_test,y_pre_dt)
# 计算f1
dt_f1_score = metrics.f1_score(y_test,y_pre_dt)
# 计算auc
dt_roc_auc_score = metrics.roc_auc_score(y_test,y_pre_dt)
4,逻辑森林
Rfc = RandomForestClassifier()
Rfc.fit(X_train,y_train)
y_pre_Rfc = Rfc.predict(X_test)
# 计算accuracy
Rfc_accuracy = metrics.accuracy_score(y_test,y_pre_Rfc)
# 计算precision
Rfc_precision_score = metrics.precicion_score(y_test,y_pre_Rfc)
# 计算recall
Rfc_recall_score = metrics.recall_score(y_test,y_pre_Rfc)
# 计算f1
Rfc_f1_score = metrics.f1_score(y_test,y_pre_Rfc)
# 计算auc
Rfc_roc_auc_score = metrics.roc_auc_score(y_test,y_pre_Rfc)
5,GBDT
Gbdt = GradientBoostingClassifier()
Gbdt.fit(X_train,y_train)
y_pre_Gbdt = Gbdt.predict(X_test)
# 计算accuracy
Gbdt_accuracy = metrics.accuracy_score(y_test,y_pre_Gbdt)
# 计算precision
Gbdt_precision_score = metrics.precicion_score(y_test,y_pre_Gbdt)
# 计算recall
Gbdt_recall_score = metrics.recall_score(y_test,y_pre_Gbdt)
# 计算f1
Gbdt_f1_score = metrics.f1_score(y_test,y_pre_Gbdt)
# 计算auc
Gbdt_roc_auc_score = metrics.roc_auc_score(y_test,y_pre_Gbdt)
6,XGBoost
Xgb = XGBClassifier()
Xgb.fit(X_train,y_train)
y_pre_Xgb = Xgb.predict(X_test)
# 计算accuracy
Xgb_accuracy = metrics.accuracy_score(y_test,y_pre_Xgb)
# 计算precision
Xgb_precision_score = metrics.precicion_score(y_test,y_pre_Xgb)
# 计算recall
Xgb_recall_score = metrics.recall_score(y_test,y_pre_Xgb)
# 计算f1
Xgb_f1_score = metrics.f1_score(y_test,y_pre_Xgb)
# 计算auc
Xgb_roc_auc_score = metrics.roc_auc_score(y_test,y_pre_Xgb)
7,lightgbm
Lgb = LGBMClassifier()
Lgb.fit(X_train,y_train)
y_pre_Lgb = Lgb.predict(X_test)
# 计算accuracy
Lgb_accuracy = metrics.accuracy_score(y_test,y_pre_Lgb)
# 计算precision
Lgb_precision_score = metrics.precicion_score(y_test,y_pre_Lgb)
# 计算recall
Lgb_recall_score = metrics.recall_score(y_test,y_pre_Lgb)
# 计算f1
Lgb_f1_score = metrics.f1_score(y_test,y_pre_Lgb)
# 计算auc
Lgb_roc_auc_score = metrics.roc_auc_score(y_test,y_pre_Lgb)
ROC图打印
y_score_lr = lr.predict_proba(x_test)[:,1]
y_score_svc = svc.predict_proba(x_test)[:,1]
y_score_rfc = rfc.predict_proba(x_test)[:,1]
y_score_clf = clf.predict_proba(x_test)[:,1]
y_score_gbc = gbc.predict_proba(x_test)[:,1]
y_score_xgb = xgb.predict_proba(x_test)[:,1]
y_score_lgb = lgb.predict_proba(x_test)[:,1]
fpr_lr,tpr_lr,thresholds_lr=roc_curve(y_test,lr.predict_proba(x_test)[:,1],pos_label=1)
fpr_svc,tpr_svc,thresholds_svc = roc_curve(y_test,svc.predict_proba(x_test)[:,1],pos_label=1)
fpr_rfc,tpr_rfc,thresholds_rfc = roc_curve(y_test,rfc.predict_proba(x_test)[:,1],pos_label=1)
fpr_clf,tpr_clf,thresholds_clf = roc_curve(y_test,clf.predict_proba(x_test)[:,1],pos_label=1)
fpr_gbc,tpr_gbc,thresholds_gbc = roc_curve(y_test,gbc.predict_proba(x_test)[:,1],pos_label=1)
fpr_xgb,tpr_xgb,thresholds_xgb = roc_curve(y_test,xgb.predict_proba(x_test)[:,1],pos_label=1)
fpr_lgb,tpr_lgb,thresholds_lgb = roc_curve(y_test,lgb.predict_proba(x_test)[:,1],pos_label=1)
plt.figure(figsize=[6,6])
plt.plot(fpr_lr,tpr_lr, color='black')
plt.plot(fpr_svc,tpr_svc, color='red')
plt.plot(fpr_rfc,tpr_rfc, color='green')
plt.plot(fpr_clf,tpr_clf, color='blue')
plt.plot(fpr_gbc,tpr_gbc, color='yellow')
plt.plot(fpr_xgb,tpr_xgb, color='brown')
plt.plot(fpr_lgb,tpr_lgb, color='purple')
plt.title('ROC curve')
结果展示
各种库的使用并不熟悉,画图库的使用还需要再练习.
参考文章:
https://blog.csdn.net/tttwister/article/details/81159487
https://blog.csdn.net/tttwister/article/details/81138865