【发布时间】:2021-12-20 19:08:00
【问题描述】:
我目前正在尝试为我的神经网络模型制作混淆矩阵,但不断收到此错误:
ValueError: Classification metrics can't handle a mix of binary and continuous targets.
我有一个包含 100 个正例和 100 个负例的肽数据集,标签是 1 和 0。我已将每个肽段转换为 Word2Vec 嵌入,然后放入 ML 模型并进行训练。
这是我的代码:
pos = "/content/drive/MyDrive/pepfun/Training_format_pos (1).txt"
neg = "/content/drive/MyDrive/pepfun/Training_format_neg.txt"
# pos sequences extract into list
f = open(pos, 'r')
file_contents = f.read()
data = file_contents
f.close()
newdatapos = data.splitlines()
print(newdatapos)
# neg sequences extract into list
f2 = open(neg, 'r')
file_contents2 = f2.read()
data2 = file_contents2
f2.close()
newdataneg = data2.splitlines()
print(newdataneg)
!pip install rdkit-pypi
import rdkit
from rdkit import Chem
# set up embeddings
import nltk
from gensim.models import Word2Vec
import multiprocessing
EMB_DIM = 4
# embeddings pos
w2vpos = Word2Vec([newdatapos], size=EMB_DIM, min_count=1)
sequez = "VVYPWTQRF"
w2vpos[sequez].shape
words=list(w2vpos.wv.vocab)
vectors = []
for word in words:
vectors.append(w2vpos[word].tolist())
print(len(vectors))
print(vectors[1])
data = np.array(vectors)
# embeddings neg
w2vneg = Word2Vec([newdataneg], size=EMB_DIM, min_count=1)
sequen = "GIGKFLHSAGKFGKAFLGEVMKS"
w2vneg[sequen].shape
wordsneg = list(w2vneg.wv.vocab)
vectorsneg = []
for word in wordsneg:
vectorsneg.append(w2vneg[word].tolist())
allvectors = vectorsneg + vectors
print(len(allvectors))
arrayvectors = np.array(allvectors)
labels = []
for i in range (100):
labels.append(1)
print(labels)
for i in range (100):
labels.append(0)
print(labels)
print(len(labels))
import seaborn as sns
!pip install keras
import keras
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.utils import shuffle
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
!pip install tensorflow==2.7.0
import tensorflow as tf
from keras import metrics
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Conv3D, Flatten, Dropout
import sklearn
a = sklearn.utils.shuffle(arrayvectors, random_state=1)
b = sklearn.utils.shuffle(labels, random_state=1)
dfa = pd.DataFrame(a, columns=None)
dfb = pd.DataFrame(b, columns=None)
X = dfa.iloc[:]
y = dfb.iloc[:]
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=300)
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)
y_train = np.asarray(y_train)
y_test = np.asarray(y_test)
y_train = y_train.astype(np.float32)
y_test = y_test.astype(np.float32)
# train data & test data tensor conversion
class trainData(Dataset):
def __init__(self, X_data, y_data):
self.X_data = X_data
self.y_data = y_data
def __getitem__(self, index):
return self.X_data[index], self.y_data[index]
def __len__ (self):
return len(self.X_data)
train_data = trainData(torch.FloatTensor(X_train),
torch.FloatTensor(y_train))
## test data
class testData(Dataset):
def __init__(self, X_data):
self.X_data = X_data
def __getitem__(self, index):
return self.X_data[index]
def __len__ (self):
return len(self.X_data)
test_data = testData(torch.FloatTensor(X_test))
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_data, batch_size=1)
# make model
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(4,)))
model.add(Dropout(0.1))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(16, input_dim=1, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(12,activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(1,activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy',optimizer='RMSprop', metrics=['accuracy','AUC'])
history = model.fit(X_train, y_train, epochs=2000,batch_size=64, validation_data = (X_test, y_test), validation_batch_size=64)
from sklearn.metrics import confusion_matrix, classification_report
print(y_pred.round)
print(classification_report(y_test,y_pred))
我已经尝试打印我的 y_pred 值来查看问题。这是我得到的:
[[6.0671896e-01]
[9.9999785e-01]
[1.6576621e-01]
[9.9999899e-01]
[5.6016445e-04]
[2.4935007e-02]
[4.4204036e-11]
[2.8884350e-11]
[6.3217885e-05]
[4.7181606e-02]
[9.9742711e-03]
[1.0780278e-01]
[7.0868194e-01]
[2.0298421e-02]
[9.5819527e-01]
[1.4784497e-01]
[1.7605269e-01]
[9.9643111e-01]
[4.7657710e-01]
[9.9991858e-01]
[4.5830309e-03]
[6.5091753e-01]
[3.8710403e-01]
[2.4756461e-02]
[1.1719930e-01]
[6.4381957e-03]
[7.1598434e-01]
[1.5749395e-02]
[6.8473631e-01]
[9.5499575e-01]
[2.2420317e-02]
[9.9999177e-01]
[6.9633877e-01]
[9.2811453e-01]
[1.8373668e-01]
[2.9298562e-07]
[1.1250973e-03]
[4.3785056e-01]
[9.6832716e-01]
[8.6754566e-01]]
这不是 1 和 0。我相信那里也有问题,但我不确定。
【问题讨论】:
标签: python numpy tensorflow keras bioinformatics