【发布时间】:2020-08-12 15:49:42
【问题描述】:
我是 CNN 的新手,我的二元分类产生 50% 的验证准确度时遇到问题。
为了记录,我正在尝试将脑部扫描的 MRI 归类为阿尔茨海默病和健康控制。
默认情况下,图像为 250x250 像素,黑白 .png 格式,我总共有大约 1,000 张图像。
我尝试创建自己的模型并实施 VGG16,所有这些都产生了大约 50% 的验证准确度。
我开始认为这可能是数据处理不正确的问题,但由于我是新手,所以我不确定。也许与黑白图像被解释为RGB有关?但老实说,我不确定。
希望有人能看一下,谢谢!
import tensorflow as tf
from keras.layers.convolutional import Convolution2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Flatten, BatchNormalization, Conv2D, MaxPool2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import warnings
import matplotlib.pyplot as plt
from tensorflow.python.keras.applications.vgg16 import VGG16
from tensorflow.python.keras.layers import ZeroPadding2D, MaxPooling2D
warnings.simplefilter(action='ignore', category=FutureWarning)
os.chdir('C:/Users/dancu/PycharmProjects/firstCNN/data/ad-vs-cn')
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print("Num GPUs Available: ", len(physical_devices))
tf.config.experimental.set_memory_growth(physical_devices[0], True)
# Define paths for image data
train_path = "C:/Users/dancu/PycharmProjects/firstCNN\data/ad-vs-cn/train"
test_path = "C:/Users/dancu/PycharmProjects/firstCNN\data/ad-vs-cn/test"
valid_path = "C:/Users/dancu/PycharmProjects/firstCNN\data/ad-vs-cn/valid"
# Use ImageDataGenerator to create 3 lots of batches
train_batches = ImageDataGenerator(
rescale=1/255).flow_from_directory(directory=train_path,
target_size=(64,64), classes=['cn', 'ad'], batch_size=20,
color_mode="rgb")
valid_batches = ImageDataGenerator(
rescale=1/255).flow_from_directory(directory=valid_path,
target_size=(64,64), classes=['cn', 'ad'], batch_size=20,
color_mode="rgb")
test_batches = ImageDataGenerator(
rescale=1/255).flow_from_directory(directory=test_path,
target_size=(256,240), classes=['cn', 'ad'], batch_size=10,
color_mode="rgb")
imgs, labels = next(train_batches)
# Test to see normalisation has occurred properly
print(imgs[1][16])
print(labels)
# Define method to plot MRIs
def plotImages(images_arr):
fig, axes = plt.subplots(1, 10, figsize=(20,20))
axes = axes.flatten()
for img, ax in zip( images_arr, axes):
ax.imshow(img)
ax.axis('off')
plt.tight_layout()
plt.show()
# Plot a sample of MRIs
plotImages(imgs)
# Define the model
# VGG16
model = Sequential()
model.add(Conv2D(input_shape=(64,64,3),filters=64,kernel_size=(3,3),padding="same", activation="relu"))
model.add(Conv2D(filters=64,kernel_size=(3,3),padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(Flatten())
model.add(Dense(units=4096,activation="relu"))
model.add(Dense(units=4096,activation="relu"))
model.add(Dense(units=2, activation="softmax"))
# This model hits around 70% train acc, 50% val acc
# model = Sequential([
# Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding = 'same', input_shape=(64,64,3)),
# MaxPool2D(pool_size=(2, 2), strides=2),
# Dropout(0.2),
# # BatchNormalization(),
# Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'),
# MaxPool2D(pool_size=(2, 2), strides=2),
# Dropout(0.3),
# # BatchNormalization(),
# Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'),
# MaxPool2D(pool_size=(2, 2), strides=2),
# Dropout(0.4),
# # BatchNormalization(),
# Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'),
# MaxPool2D(pool_size=(2, 2), strides=2),
# Dropout(0.4),
# Flatten(),
# Dense(units=2, activation='softmax')
# ])
## This model hits around 68% training accuracy at it's peak
# base_model = Sequential([
# Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding = 'same', input_shape=(256,256,3)),
# MaxPool2D(pool_size=(2, 2), strides=2),
# Dropout(0.1),
# Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'),
# MaxPool2D(pool_size=(2, 2), strides=2),
# Dropout(0.2),
# Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'),
# MaxPool2D(pool_size=(2, 2), strides=2),
# Dropout(0.3),
# Flatten(),
# Dense(units=2, activation='softmax')
# ])
# Summarise each layer of the model
print(model.summary())
# Compile and train the model
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(x=train_batches,
steps_per_epoch=len(train_batches),
validation_data=valid_batches,
validation_steps=len(valid_batches),
epochs=35,
verbose=1
)
编辑:到目前为止,感谢大家的回复,他们都非常有见地。 我认为我们得出的结论是,这是一个小样本的情况,而且这些图像很难使用 2D CNN 进行分类。明天我将尝试使用原始 .nii 文件组合一个基本的 3D CNN,看看这是否会提高准确性。
【问题讨论】:
-
由于1000个数据不是很多,你可能想看看图像数据增强。您还可以在优化器上使用不同的动量或学习率,或者使用 less / more epoch,因为 35 个 epoch 可能会在 1000 个数据上出现过拟合问题跨度>
标签: python tensorflow machine-learning keras conv-neural-network