使用 MNIST 的超分辨率无法正常工作答案

【问题标题】：SuperResolution using MNIST not working properly使用 MNIST 的超分辨率无法正常工作
【发布时间】：2021-11-20 12:11:11
【问题描述】：

我是深度学习的新手，我制作了一个模型，假装将 14x14 图像放大到 28x28。为此，我第一次尝试使用 MNIST 存储库训练 newtork。

为了制作模型结构，我遵循了这篇论文：https://arxiv.org/pdf/1608.00367.pdf

import numpy as np
from tensorflow.keras import optimizers
from tensorflow.keras import layers
from tensorflow.keras import models
import os
import cv2
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras import initializers
import matplotlib.pyplot as plt
import pickle
import time

# Tensorboard Stuff:
NAME = "MNIST_FSRCNN_test -{}".format(
    int(time.time()))  # This is the name of our try, change it if it's a
# new try.
tensorboard = TensorBoard(log_dir='logs/{}'.format(NAME))  # defining tensorboard directory.

# Path of the data
train_small_path = "D:/MNIST/training/small_train"
train_normal_path = "D:/MNIST/training/normal_train"

test_small_path = "D:/MNIST/testing/small_test"
test_normal_path = "D:/MNIST/testing/normal_test"

# Image reading from the directories. MNIST is in grayscale so we read it that way.
train_small_array = []
for img in os.listdir(train_small_path):
    try:
        train_small_array.append(np.array(cv2.imread(os.path.join(train_small_path, img), cv2.IMREAD_GRAYSCALE)))
    except Exception as e:
        print("problem with image reading in train small")
        pass
train_normal_array = []
for img in os.listdir(train_normal_path):
    try:
        train_normal_array.append(np.array(cv2.imread(os.path.join(train_normal_path, img), cv2.IMREAD_GRAYSCALE)))
    except Exception as e:
        print("problem with image reading in train normal")
        pass
test_small_array = []
for img in os.listdir(test_small_path):
    try:
        test_small_array.append(cv2.imread(os.path.join(test_small_path, img), cv2.IMREAD_GRAYSCALE))
    except Exception as e:
        print("problem with image reading in test small")
        pass

test_normal_array = []
for img in os.listdir(test_normal_path):
    try:
        test_normal_array.append(cv2.imread(os.path.join(test_normal_path, img), cv2.IMREAD_GRAYSCALE))
    except Exception as e:
        print("problem with image reading in test normal")
        pass

train_small_array = np.array(train_small_array).reshape((60000, 14, 14, 1))
train_normal_array = np.array(train_normal_array).reshape((60000, 28, 28, 1))

test_small_array = np.array(test_small_array).reshape((10000, 14, 14, 1))
test_normal_array = np.array(test_normal_array).reshape((10000, 28, 28, 1))




training_data = []
training_data.append([train_small_array, train_normal_array])

testing_data = []
testing_data.append([test_small_array, test_normal_array])


# ---SAVE DATA--
# We are saving our data
pickle_out = open("X.pickle", "wb")
pickle.dump(y, pickle_out)
pickle_out.close()
# for reading it:
pickle_in = open("X.pickle", "rb")
X = pickle.load(pickle_in)
# -----------


# MAKING THE NETWORK
d = 56
s = 12
m = 4
upscaling = 2

model = models.Sequential()
bias = True

# Feature extraction:
model.add(layers.Conv2D(filters=d,
                        kernel_size=5,
                        padding='SAME',
                        data_format="channels_last",
                        use_bias=bias,
                        kernel_initializer=initializers.he_normal(),
                        input_shape=(None, None, 1),
                        activation='relu'))

# Shrinking:
model.add(layers.Conv2D(filters=s,
                        kernel_size=1,
                        padding='same',
                        use_bias=bias,
                        kernel_initializer=initializers.he_normal(),
                        activation='relu'))

for i in range(m):
    model.add(layers.Conv2D(filters=s,
                            kernel_size=3,
                            padding="same",
                            use_bias=bias,
                            kernel_initializer=initializers.he_normal(),
                            activation='relu'),
              )

# Expanding
model.add(layers.Conv2D(filters=d,
                        kernel_size=1,
                        padding='same',
                        use_bias=bias,
                        kernel_initializer=initializers.he_normal,
                        activation='relu'))

# Deconvolution
model.add(layers.Conv2DTranspose(filters=1,
                                 kernel_size=9,
                                 strides=(upscaling, upscaling),
                                 padding='same',
                                 use_bias=bias,
                                 kernel_initializer=initializers.random_normal(mean=0.0, stddev=0.001),
                                 activation='relu'))

# MODEL COMPILATION
model.compile(loss='mse',
              optimizer=optimizers.RMSprop(learning_rate=1e-3),  
              metrics=['acc'])




model.fit(x=train_small_array, y=train_normal_array,
          epochs=10,
          batch_size=1500,
          validation_split=0.2,
          callbacks=[tensorboard])


print(model.evaluate(test_small_array, test_normal_array))



# -DEMO-----------------------------------------------------------------
from PIL import Image
import PIL.ImageOps
import os

dir = 'C:/Users/marcc/OneDrive/Escritorio'
os.chdir(dir)

myImage = Image.open("ImageTest.PNG").convert('L')  # convert to black and white
myImage = myImage.resize((14, 14))


myImage_array = np.array(myImage)

plt.imshow(myImage_array, cmap=plt.cm.binary)
plt.show()

myImage_array = myImage_array.astype('float32') / 255
myImage_array = myImage_array.reshape(1, 14, 14, 1)



newImage = model.predict(myImage_array)
newImage = newImage.reshape(28,28)
plt.imshow(newImage, cmap=plt.cm.binary)
plt.show()

我遇到的问题是 10 个 epoch 似乎可以工作，它会转换此图像：14x14 MNIST

进入这个： 10 epochs 28x28

但是当我进行 20 个 epoch 时，我得到20 epochs 28x28

我想知道会发生什么。首先我认为模型可能是过拟合的，但是当我检查训练和验证的损失函数时，它似乎并没有过拟合： training and validation loss

【问题讨论】：

标签： python tensorflow machine-learning keras deep-learning

【解决方案1】：

我使用了您的代码并尝试重现该错误，但对我来说效果很好。我加载了 mnist 图像并使用skimage.transform.resize 将它们调整为(14, 14)。 200 个 epoch 的训练给出了：

Epoch 1/200 32/32 - 6s 91ms/step - 损失：4380.9126-acc：0.1659-val_loss：3406.4109-val_acc：0.3661
纪元 2/200 32/32 - 3s 80ms/步 - 损失：2827.0591-acc：0.5598-val_loss：2255.1472-val_acc：0.6366
...
时代 199/200 32/32 - 3s 86 毫秒/步 - 损失：149.0597 - acc：0.8035 - val_loss：191.1202 - val_acc：0.8072
时代 200/200 32/32 - 3s 85ms/step - loss: 145.8007 - acc: 0.8035 - val_loss: 207.3333 - val_acc: 0.8072

val_loss 倾向于在不同时期之间波动，但在全球范围内下降。
部分结果：

这是绘制图形的代码：

     def plot_images(num_img):
      fig, axs = plt.subplots(2, 2)
      my_normal_image = test_normal_array[num_img, :, :, 0]
      axs[0, 0].set(title='input normal image')
      axs[0, 0].imshow(my_normal_image, cmap=plt.cm.binary)
      axs[1, 0].set(title = 'small img')
      my_resized_image = resize(my_normal_image, anti_aliasing=True, output_shape=(14, 14))
      axs[1, 0].imshow(my_resized_image, cmap=plt.cm.binary)
      axs[0, 1].set(title='super resolution')
      my_super_res_image = model.predict(my_resized_image[np.newaxis, :, :, np.newaxis])[0, :, :, 0]
      axs[0, 1].imshow(my_super_res_image, cmap=plt.cm.binary)
      axs[1, 1].set(title='small resized')
      my_rr_image = resize(my_resized_image, output_shape=(28, 28), anti_aliasing=True)
      axs[1, 1].imshow(my_rr_image, cmap=plt.cm.binary)
      plt.show()

    index = 8
    plot_images(np.argwhere(y_test==index)[0][0])
    index = 4
    plot_images(np.argwhere(y_test==index)[0][0])

此外，这也是我构建数据集的方式：

    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
    train_normal_array = np.expand_dims(x_train, axis=3)
    test_normal_array = np.expand_dims(x_test, axis=3)
    train_small_array = np.zeros((train_normal_array.shape[0], 14, 14, 1))
    for i in tqdm.tqdm(range(train_normal_array.shape[0])):
      train_small_array[i, :, :] = resize(train_normal_array[i], (14, 14), anti_aliasing=True)
    test_small_array = np.zeros((test_normal_array.shape[0], 14, 14, 1))
    for i in tqdm.tqdm(range(test_normal_array.shape[0])):
      test_small_array[i, :, :] = resize(test_normal_array[i], (14, 14), anti_aliasing=True)
    training_data = []
    training_data.append([train_small_array.astype('float32'), train_normal_array.astype('float32') / 255])
    
    testing_data = []
    testing_data.append([test_small_array.astype('float32'), test_normal_array.astype('float32') / 255])

请注意，我不会将 train_small_array 和 test_small_array 除以 255，因为调整大小可以完成这项工作。

【讨论】：

非常感谢您，我的测试可能有问题。您可以发布您使用的代码吗？