这些列表必须采用什么格式才能被 Keras Tuners 搜索功能接受？答案

【问题标题】：What format do these lists have to be in to be accepted by Keras Tuners Search function?这些列表必须采用什么格式才能被 Keras Tuners 搜索功能接受？
【发布时间】：2020-10-28 15:57:58
【问题描述】：

此代码读入一组测试和训练吉他 jpg 图像，供神经网络学习和测试。

 import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import random

DATADIR = "C:/Users/TheKid/Data/DataMiningProject/DataSet"
CATEGORIES = ["Fender_Jazzmaster", "Gibson_ES"]
CATEGORIES2 = ["Test"]

for category in CATEGORIES:
    path = os.path.join(DATADIR,category)
    for img in os.listdir(path):
        img_array = cv2.imread(os.path.join(path,img),cv2.IMREAD_GRAYSCALE)
    
IMG_SIZE = 70

new_array = cv2.resize(img_array,(IMG_SIZE,IMG_SIZE))

training_data = []
def create_training_data():
        for category in CATEGORIES:
            path = os.path.join(DATADIR,category)
            class_num = CATEGORIES.index(category)
            for img in os.listdir(path):
                img_array = cv2.imread(os.path.join(path,img),cv2.IMREAD_GRAYSCALE)
                new_array = cv2.resize(img_array,(IMG_SIZE,IMG_SIZE))
                training_data.append([new_array,class_num])

create_training_data()

print(len(training_data))
random.shuffle(training_data)

X = []
y = []

for features, label in training_data:
    X.append(features)
    y.append(label)

X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 1)              

for category in CATEGORIES2:
    path2 = os.path.join(DATADIR,category)
    for img in os.listdir(path2):
        img_array2 = cv2.imread(os.path.join(path2,img),cv2.IMREAD_GRAYSCALE)
    
IMG_SIZE = 70

new_array2 = cv2.resize(img_array,(IMG_SIZE,IMG_SIZE))

testing_data = []
def create_testing_data():
        for category in CATEGORIES2:
            path2 = os.path.join(DATADIR,category)
            class_num2 = CATEGORIES2.index(category)
            for img in os.listdir(path2):
                img_array2 = cv2.imread(os.path.join(path2,img),cv2.IMREAD_GRAYSCALE)
                new_array2 = cv2.resize(img_array2,(IMG_SIZE,IMG_SIZE))
                testing_data.append([new_array2,class_num2])                
                
            

create_testing_data()

print(len(testing_data))
random.shuffle(testing_data)

X2 = []
y2 = []

for features, label in testing_data:
    X2.append(features)
    y2.append(label)

X2 = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 1) 

import pickle

pickle_out = open("X.pickle" , "wb")
pickle.dump(X, pickle_out)
pickle_out.close()

pickle_out = open("y.pickle" , "wb")
pickle.dump(y, pickle_out)
pickle_out.close()

pickle_in = open("X.pickle", "rb")
X = pickle.load(pickle_in)

pickle_out = open("X2.pickle" , "wb")
pickle.dump(X2, pickle_out)
pickle_out.close()

pickle_out = open("y2.pickle" , "wb")
pickle.dump(y2, pickle_out)
pickle_out.close()

pickle_in = open("X2.pickle", "rb")
X = pickle.load(pickle_in)

下一段代码采用之前代码中保存的 pickle 文件，并应该使用 Keras 调谐器搜索功能来运行具有不同数量的卷积层、层大小等的神经网络的不同变体，因此我可以选择最多高效的版本。但是运行时会抛出这个错误：

 ValueError: Data cardinality is ambiguous:
  x sizes: 1312
  y sizes: 12
Please provide data which shares the same first dimension.

所有变量的形状是：

(x_train = (1312, 70, 70, 1)

y_train =(1312,)

x_test = (1312, 70, 70, 1)

y_test =(12,)

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.callbacks import TensorBoard
import numpy as np
import time
import pickle
import matplotlib.pyplot as plt
from tensorflow import keras
from kerastuner.tuners import RandomSearch
from kerastuner.engine.hyperparameters import HyperParameters




pickle_in = open("X.pickle","rb")
x_train = pickle.load(pickle_in)

pickle_in = open("y.pickle","rb")
y_train = pickle.load(pickle_in)

pickle_in = open("X2.pickle","rb")
x_test = pickle.load(pickle_in)

pickle_in = open("y2.pickle","rb")
y_test = pickle.load(pickle_in)



x_train=np.array(x_train/255.0)
y_train=np.array(y_train)

x_test=np.array(x_test/255.0)
y_test=np.array(y_test)


LOG_DIR = f"{int(time.time())}"

def build_model(hp):
    model = keras.models.Sequential()

    model.add(Conv2D(hp.Int("input_units",32, 256, 32 ), (3, 3), input_shape=x_train.shape[1:]))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    for i in range(hp.Int("n_layers", 1, 4)):
        model.add(Conv2D(hp.Int(f"conv-{i}_units",32, 256, 32 ), (3, 3)))
        model.add(Activation('relu'))
    

    model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors

    model.add(Dense(10))
    model.add(Activation("softmax"))

    model.compile(optimizer="adam",
                  loss="binary_crossentropy",
                  metrics=["accuracy"])

    return model

tuner = RandomSearch(
        build_model,
        objective = "val_accuracy",
        max_trials = 1,
        executions_per_trial = 1,
        directory = LOG_DIR)



tuner.search(x=x_train,
            y=y_train,
            epochs=1,
            batch_size=64,
            validation_data=(x_test,y_test))

with open(f"tuner_{int(time.time())}.pkl", "wb") as f:
    pickle.dump(tuner, f)

tuner = pickle.load(open(""))

print(tuner.get_best_hyperparameters()[0].values)

我将如何解决此错误？对我来说，这似乎是一个矩阵格式问题，但我在处理此类问题方面经验不足。

【问题讨论】：

您遇到的错误是什么？请分享日志
我在上面第二段代码的描述中提到了错误：ValueError: Data cardinality is ambiguous: x sizes: 1312 y sizes: 12 Please provide data which share the same first dimension.
好的。我对问题进行了格式化以提高可读性。更改将在社区成员批准后反映。

标签： tensorflow keras syntax conv-neural-network keras-tuner

【解决方案1】：

正如数据的错误消息和形状（x_test 和 y_test）清楚地表明，x_test 中有 1312 行，y_test 中有 12 行。您正在将此数据提供给validation_data=(x_test,y_test)。

请在validation_data 中为x_test 和y_test 传递相同维度或相同行的数据，这应该可以解决您的错误。

【讨论】：