不想整理代码了。先给个结果图:
train 0 loss: 1838.0616 train 100 loss: 1441.5283 train 200 loss: 1299.4546 train 300 loss: 934.36536 train 400 loss: 506.06702 train 500 loss: 322.9782 train 600 loss: 271.5825 train 700 loss: 360.091 train 800 loss: 237.25177 train 900 loss: 332.97592 train 1000 loss: 117.5983 train 1100 loss: 173.39397 train 1200 loss: 51.26674 train 1300 loss: 82.82826 train 1400 loss: 74.705734 train 1500 loss: 113.63321 train 1600 loss: 71.29809 train 1700 loss: 38.41456 train 1800 loss: 82.75247 train 1900 loss: 44.553272 test 0,accuracy:0.953125,auc: (0.0, 0.9708618) test 1,accuracy:0.9375,auc: (0.9708618, 0.96028894) test 2,accuracy:0.9609375,auc: (0.96028894, 0.9594982) test 3,accuracy:0.953125,auc: (0.9594982, 0.96195656) test 4,accuracy:0.9375,auc: (0.96195656, 0.9627208)
loss这么大,结果这么准确。我也搞不懂是怎么肥事呀。
AUC也没什么问题。暂时认为是好的吧。
下面是源码dataUtil用来对数据预处理:
import pandas as pd import numpy as np def load_csv(filename): data=pd.read_csv(filename) data = data.drop(data.columns[39:], axis=1) return data def toInt(y): return int(y) def split_x_y(data): keys=data.keys().tolist() y=data["Label"] keys.remove("Label") x=data[keys] return (x.values,y.values) def max_min_nomalize(X): keys=X.keys().tolist() keys.remove("Index") keys.remove("Label") #keys.remove("Gender") #keys=["BMI","JiGan","ShouSuoYa","ShuZhangYa"] #删掉JiGan为-1的人 #X = X[X["JiGan"].isin([-1.0]) == False] for key in keys: #normalize_col=(X[key]-(X[key].max()+X[key].min())/2)/(X[key].max()-X[key].min()) #测试1:用mean来normolize normalize_col = (X[key] - X[key].mean()) / (X[key].max() - X[key].min()) X = X.drop(key, axis=1) X[key]=normalize_col return X if __name__=="__main__": pd.set_option('display.max_rows', 500) pd.set_option('display.max_columns', 500) pd.set_option('display.width', 500) data=load_csv("./data/patient_data.csv") #print(data.head()) print(data.info()) print(data.describe()) print(data.count()) #print(data["Label"].value_counts()) #data=data[data["JiGan"].isin([-1.0])==False] # print(data) #print(data) #print(data.describe()) #x=max_min_nomalize(data) # for key in data.keys().tolist(): # print("********************************************************{}**********************************".format(key)) # print(data[key].value_counts()) #data=load_csv("F:\workspaces\pycharm\Patient\data\patient_data.csv") #data=max_min_nomalize(data) #print(data.head())