经过这么多天,我希望您找到了问题的解决方案,
但我将在这里分享另一个想法,以便像我这样的新人
以后遇到同样的问题,求帮助。
几天前我遇到了这种问题。正如 user3731622 所说,follow_links 将解决您的问题。另外,我认为合并两个数据生成器的想法会奏效。但是,在这种情况下,必须根据每个相关目录中的数据范围来确定相应数据生成器的批量大小。
子生成器的批量大小:
Where,
b = Batch Size Of Any Sub-generator
B = Desired Batch Size Of The Merged Generator
n = Number Of Images In That Directory Of Sub-generator
the sum of n = Total Number Of Images In All Directories
查看下面的代码,这可能会有所帮助:
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import Sequence
import matplotlib.pyplot as plt
import numpy as np
import os
class MergedGenerators(Sequence):
def __init__(self, batch_size, generators=[], sub_batch_size=[]):
self.generators = generators
self.sub_batch_size = sub_batch_size
self.batch_size = batch_size
def __len__(self):
return int(
sum([(len(self.generators[idx]) * self.sub_batch_size[idx])
for idx in range(len(self.sub_batch_size))]) /
self.batch_size)
def __getitem__(self, index):
"""Getting items from the generators and packing them"""
X_batch = []
Y_batch = []
for generator in self.generators:
if generator.class_mode is None:
x1 = generator[index % len(generator)]
X_batch = [*X_batch, *x1]
else:
x1, y1 = generator[index % len(generator)]
X_batch = [*X_batch, *x1]
Y_batch = [*Y_batch, *y1]
if self.generators[0].class_mode is None:
return np.array(X_batch)
return np.array(X_batch), np.array(Y_batch)
def build_datagenerator(dir1=None, dir2=None, batch_size=32):
n_images_in_dir1 = sum([len(files) for r, d, files in os.walk(dir1)])
n_images_in_dir2 = sum([len(files) for r, d, files in os.walk(dir2)])
# Have to set different batch size for two generators as number of images
# in those two directories are not same. As we have to equalize the image
# share in the generators
generator1_batch_size = int((n_images_in_dir1 * batch_size) /
(n_images_in_dir1 + n_images_in_dir2))
generator2_batch_size = batch_size - generator1_batch_size
generator1 = ImageDataGenerator(
rescale=1. / 255,
shear_range=0.2,
zoom_range=0.2,
rotation_range=5.,
horizontal_flip=True,
)
generator2 = ImageDataGenerator(
rescale=1. / 255,
zoom_range=0.2,
horizontal_flip=False,
)
# generator2 has different image augmentation attributes than generaor1
generator1 = generator1.flow_from_directory(
dir1,
target_size=(128, 128),
color_mode='rgb',
class_mode=None,
batch_size=generator1_batch_size,
shuffle=True,
seed=42,
interpolation="bicubic",
)
generator2 = generator2.flow_from_directory(
dir2,
target_size=(128, 128),
color_mode='rgb',
class_mode=None,
batch_size=generator2_batch_size,
shuffle=True,
seed=42,
interpolation="bicubic",
)
return MergedGenerators(
batch_size,
generators=[generator1, generator2],
sub_batch_size=[generator1_batch_size, generator2_batch_size])
def test_datagen(batch_size=32):
datagen = build_datagenerator(dir1="./asdf",
dir2="./asdf2",
batch_size=batch_size)
print("Datagenerator length (Batch count):", len(datagen))
for batch_count, image_batch in enumerate(datagen):
if batch_count == 1:
break
print("Images: ", image_batch.shape)
plt.figure(figsize=(10, 10))
for i in range(image_batch.shape[0]):
plt.subplot(1, batch_size, i + 1)
plt.imshow(image_batch[i], interpolation='nearest')
plt.axis('off')
plt.tight_layout()
test_datagen(4)