有一些工作正在扩展 ImageDataGenerator 以使其更灵活地处理这些类型的情况(参见this issue on Github 中的示例)。
此外,正如 Mikael Rousson 在 cmets 中所提到的,您可以轻松地自己创建自己的 ImageDataGenerator 版本,同时利用它的许多内置函数来简化它。这是我用于图像去噪问题的示例代码,其中我使用随机裁剪 + 加性噪声来动态生成干净和嘈杂的图像对。您可以轻松地对其进行修改以添加其他类型的增强。之后,您可以使用Model.fit_generator 使用这些方法进行训练。
from keras.preprocessing.image import load_img, img_to_array, list_pictures
def random_crop(image, crop_size):
height, width = image.shape[1:]
dy, dx = crop_size
if width < dx or height < dy:
return None
x = np.random.randint(0, width - dx + 1)
y = np.random.randint(0, height - dy + 1)
return image[:, y:(y+dy), x:(x+dx)]
def image_generator(list_of_files, crop_size, to_grayscale=True, scale=1, shift=0):
while True:
filename = np.random.choice(list_of_files)
try:
img = img_to_array(load_img(filename, to_grayscale))
except:
return
cropped_img = random_crop(img, crop_size)
if cropped_img is None:
continue
yield scale * cropped_img - shift
def corrupted_training_pair(images, sigma):
for img in images:
target = img
if sigma > 0:
source = img + np.random.normal(0, sigma, img.shape)/255.0
else:
source = img
yield (source, target)
def group_by_batch(dataset, batch_size):
while True:
try:
sources, targets = zip(*[next(dataset) for i in xrange(batch_size)])
batch = (np.stack(sources), np.stack(targets))
yield batch
except:
return
def load_dataset(directory, crop_size, sigma, batch_size):
files = list_pictures(directory)
generator = image_generator(files, crop_size, scale=1/255.0, shift=0.5)
generator = corrupted_training_pair(generator, sigma)
generator = group_by_batch(generator, batch_size)
return generator
然后你可以像这样使用上面的:
train_set = load_dataset('images/train', (patch_height, patch_width), noise_sigma, batch_size)
val_set = load_dataset('images/val', (patch_height, patch_width), noise_sigma, batch_size)
model.fit_generator(train_set, samples_per_epoch=batch_size * 1000, nb_epoch=nb_epoch, validation_data=val_set, nb_val_samples=1000)