【发布时间】:2021-08-02 06:55:59
【问题描述】:
我有三个列表,我想一次修复“random_state”,同时运行其余两个列表,如预期输出所示。我应该编辑哪个部分以获得预期的输出?目的是确定每个欠采样的射频性能。
number = list(range(41,55))
data = [df_AFC,df_AF,df_ESF,df_EXF,df_F,df_GF,df_KRFC,df_KRF,df_MF,df_PF,df_SFC,df_SF]
name = ['AFC','AF','ESF','EXF','F','GF','KRFC','KRF','MF','PF','SFC','SF']
def remove_low_variance(input_data, threshold=0.1):
selection = VarianceThreshold(threshold)
selection.fit(input_data)
return input_data[input_data.columns[selection.get_support(indices=True)]]
result = []
for k, i, j in zip(number,data,name):
# rus = RandomUnderSampler(sampling_strategy="not minority") # String
rus = RandomUnderSampler(sampling_strategy=1, random_state=k) # Numerical value
x = i.drop('class', axis=1)
y = i['class']
x_res, y_res = rus.fit_resample(x, y)
#replace with x_res, y_res from now on
remove_low_variance(x_res, threshold=0.1)
x_train, x_test, y_train, y_test = train_test_split(x_res, y_res, test_size=0.2,
random_state=42)
x_train.shape, x_test.shape
model = RandomForestClassifier(n_estimators=500, random_state=42)
model.fit(x_train, y_train)
y_train_pred = model.predict(x_train)
y_test_pred = model.predict(x_test)
mcc_train = matthews_corrcoef(y_train, y_train_pred)
#print(mcc_train)
mcc_test = matthews_corrcoef(y_test, y_test_pred)
#print(mcc_test)
rf = RandomForestClassifier(n_estimators=500, random_state=42)
cv_scores = cross_val_score(rf, x_train, y_train, cv=5)
#print(cv_scores)
mcc_cv = cv_scores.mean()
#print(mcc_cv)
random_state_balance = pd.Series(k, name='Random_state')
FP_name = pd.Series(j, name='Fingerprint')
model_name = pd.Series(['Random forest'], name='Name')
mcc_train_series = pd.Series(mcc_train, name='MCC_train')
mcc_cv_series = pd.Series(mcc_cv, name='MCC_cv')
mcc_test_series = pd.Series(mcc_test, name='MCC_test')
performance_metrics = pd.concat([random_state_balance,FP_name, model_name,
mcc_train_series, mcc_cv_series,
mcc_test_series], axis=1)
result.append(performance_metrics)
电流输出
预期输出
【问题讨论】:
标签: python list for-loop scikit-learn sampling