【问题标题】:How to compute the average values in classification report for all folds in a K-fold Cross-validation?如何计算 K 折交叉验证中所有折的分类报告中的平均值?
【发布时间】:2020-08-03 04:37:31
【问题描述】:

我的目标是在 K-fold 交叉验证中获得所有折叠的分类报告。

我已经根据in this post 提出的解决方案调整了我的脚本。这是我的代码:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
from sklearn.metrics import classification_report, accuracy_score, make_scorer
from sklearn.model_selection._validation import cross_val_score
from sklearn.model_selection import KFold
from scipy.stats import sem


r_filenameTSV = 'TSV/A19784.tsv'

tsv_read = pd.read_csv(r_filenameTSV, sep='\t',names=["vector"])

df = pd.DataFrame(tsv_read)

df = pd.DataFrame(df.vector.str.split(' ',1).tolist(),
                                   columns = ['label','vector'])

print(df)


y = pd.DataFrame([df.label]).astype(int).to_numpy().reshape(-1,1).ravel()
print(y.shape)
#exit()

X = pd.DataFrame([dict(y.split(':') for y in x.split()) for x in df['vector']])
print(X.astype(float).to_numpy())
print(X)
#exit()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=0)


clf = svm.SVC(kernel='rbf',
              C=100,
              gamma=0.001,
              )
#scores = cross_val_score(clf, X, y, cv=10)

print ("K-Folds scores:")
#print (scores) 

def classification_report_with_accuracy_score(y_true, y_pred):

    print (classification_report(y_true, y_pred)) # print classification report
    return accuracy_score(y_true, y_pred) # return accuracy score
    # Nested CV with parameter optimization
scores = cross_val_score(clf, X, y, cv=10, \
               scoring=make_scorer(classification_report_with_accuracy_score))
print (scores)   
 

我有这样的每个折叠的分类报告:

       precision    recall  f1-score   support

           0       1.00      0.48      0.65       702
           1       0.78      1.00      0.88      1276

    accuracy                           0.82      1978
   macro avg       0.89      0.74      0.76      1978
weighted avg       0.86      0.82      0.80      1978

要获得关于 10 折的整体计算的相同报告,我需要修改什么?

问候

【问题讨论】:

    标签: python machine-learning scikit-learn


    【解决方案1】:

    也许有用。这是我如何解决问题的:

    import pandas as pd
    import numpy as np
    from sklearn.model_selection import train_test_split
    from sklearn import svm
    from sklearn import metrics
    from sklearn.metrics import classification_report, accuracy_score, make_scorer
    from sklearn.model_selection._validation import cross_val_score
    from sklearn.model_selection import GridSearchCV, KFold, StratifiedKFold
    from scipy.stats import sem
    
    
    r_filenameTSV = 'TSV/A19784.tsv'
    
    tsv_read = pd.read_csv(r_filenameTSV, sep='\t',names=["vector"])
    
    df = pd.DataFrame(tsv_read)
    
    df = pd.DataFrame(df.vector.str.split(' ',1).tolist(),
                                       columns = ['label','vector'])
    
    print(df)
    
    
    y = pd.DataFrame([df.label]).astype(int).to_numpy().reshape(-1,1).ravel()
    print(y.shape)
    #exit()
    
    X = pd.DataFrame([dict(y.split(':') for y in x.split()) for x in df['vector']])
    print(X.astype(float).to_numpy())
    print(X)
    #exit()
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=0)
    
    
    clf = svm.SVC(kernel='rbf',
                  C=3,
                  gamma=3,
                  )
    #scores = cross_val_score(clf, X, y, cv=10)
    
    print ("K-Folds scores:")
    #print (scores) 
    
    '''
    def classification_report_with_accuracy_score(y_true, y_pred):
    
        print (classification_report(y_true, y_pred)) # print classification report
        return accuracy_score(y_true, y_pred) # return accuracy score
    
    scores = cross_val_score(clf, X, y, cv=10, \
        scoring=make_scorer(classification_report_with_accuracy_score))
    print (scores)
    '''
    originalclass = []
    predictedclass = []
    
    def classification_report_with_accuracy_score(y_true, y_pred):
        originalclass.extend(y_true)
        predictedclass.extend(y_pred)
        return accuracy_score(y_true, y_pred) # return accuracy score
    
    inner_cv = StratifiedKFold(n_splits=10)
    outer_cv = StratifiedKFold(n_splits=10)
    
    # Nested CV with parameter optimization
    nested_score = cross_val_score(clf, X=X, y=y, cv=outer_cv, scoring=make_scorer(classification_report_with_accuracy_score))
    
    # Average values in classification report for all folds in a K-fold Cross-validation  
    print(classification_report(originalclass, predictedclass)) 
    

    结果

    K-Folds scores:
                  precision    recall  f1-score   support
    
               0       0.83      0.81      0.82      7023
               1       0.90      0.90      0.90     12761
    
        accuracy                           0.87     19784
       macro avg       0.86      0.86      0.86     19784
    weighted avg       0.87      0.87      0.87     19784
    

    【讨论】:

      猜你喜欢
      • 2019-12-13
      • 2015-12-13
      • 2021-12-26
      • 2016-01-15
      • 2018-08-29
      • 2017-06-09
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多