【发布时间】:2018-03-09 15:33:27
【问题描述】:
编辑添加: 我找到了我认为可行的解决方案:https://bleyddyn.github.io/posts/2017/10/keras-lstm/
我正在尝试使用 Conv/LSTM 网络来控制机器人。我想我已经设置好了所有东西,所以我可以开始用回放内存中的批量数据训练它,但我不知道如何实际使用它来控制机器人。简化的测试代码如下。
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Flatten, Input
from keras.layers import Convolution2D
from keras.layers.recurrent import LSTM
from keras.layers.wrappers import TimeDistributed
from keras.utils import to_categorical
def make_model(num_actions, timesteps, input_dim, l2_reg=0.005 ):
input_shape=(timesteps,) + input_dim
model = Sequential()
model.add(TimeDistributed( Convolution2D(8, (3, 3), strides=(2,2), activation='relu' ), input_shape=input_shape) )
model.add(TimeDistributed( Convolution2D(16, (3, 3), strides=(2,2), activation='relu', ) ))
model.add(TimeDistributed( Convolution2D(32, (3, 3), strides=(2,2), activation='relu', ) ))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(512, return_sequences=True, activation='relu', unroll=True))
model.add(Dense(num_actions, activation='softmax', ))
model.compile(loss='categorical_crossentropy', optimizer='adam' )
return model
batch_size = 16
timesteps = 10
num_actions = 6
model = make_model( num_actions, timesteps, (84,84,3) )
model.summary()
# Fake training batch. Would be pulled from a replay memory
batch = np.random.uniform( low=0, high=255, size=(batch_size,timesteps,84,84,3) )
y = np.random.randint( 0, high=5, size=(160) )
y = to_categorical( y, num_classes=num_actions )
y = y.reshape( batch_size, timesteps, num_actions )
# stateful should be false here
pred = model.train_on_batch( batch, y )
# move trained network to robot
# This works, but it isn't practical to not get outputs (actions) until after 10 timesteps and I don't think the LSTM internal state would be correct if I tried a rolling queue of input images.
batch = np.random.uniform( low=0, high=255, size=(1,timesteps,84,84,3) )
pred = model.predict( batch, batch_size=1 )
# This is what I would need to do on my robot, with the LSTM keeping state between calls to predict
max_time = 10 # or 100000, or forever, etc.
for i in range(max_time) :
image = np.random.uniform( low=0, high=255, size=(1,1,84,84,3) ) # pull one image from camera
# stateful should be true here
pred = model.predict( image, batch_size=1 )
# take action based on pred
我在“model.predict(image...”行中得到的错误是:
ValueError: 检查时出错:预期 time_distributed_1_input 的形状为 (None, 10, 84, 84, 3) 但得到的数组的形状为 (1, 1, 84, 84, 3)
这是可以理解的,但我找不到解决办法。 我不太了解 Keras,甚至不知道我是否正确使用了 TimeDistributed 层。
那么,这在 Keras 中是否可行?如果有,怎么做?
如果没有,在 TF 或 PyTorch 中是否可以?
感谢您的任何建议!
编辑添加运行代码,虽然它不一定正确。仍然需要在 OpenAI 健身房任务上进行测试。
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Flatten, Input
from keras.layers import Convolution2D
from keras.layers.recurrent import LSTM
from keras.layers.wrappers import TimeDistributed
from keras.utils import to_categorical
def make_model(num_actions, timesteps, input_dim, l2_reg=0.005 ):
input_shape=(1,None) + input_dim
model = Sequential()
model.add(TimeDistributed( Convolution2D(8, (3, 3), strides=(2,2), activation='relu' ), batch_input_shape=input_shape) )
model.add(TimeDistributed( Convolution2D(16, (3, 3), strides=(2,2), activation='relu', ) ))
model.add(TimeDistributed( Convolution2D(32, (3, 3), strides=(2,2), activation='relu', ) ))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(512, return_sequences=True, activation='relu', stateful=True))
model.add(Dense(num_actions, activation='softmax', ))
model.compile(loss='categorical_crossentropy', optimizer='adam' )
return model
batch_size = 16
timesteps = 10
num_actions = 6
model = make_model( num_actions, 1, (84,84,3) )
model.summary()
# Fake training batch. Would be pulled from a replay memory
batch = np.random.uniform( low=0, high=255, size=(batch_size,timesteps,84,84,3) )
y = np.random.randint( 0, high=5, size=(160) )
y = to_categorical( y, num_classes=num_actions )
y = y.reshape( batch_size, timesteps, num_actions )
# Need to find a way to prevent the optimizer from updating every b, but accumulate updates over an entire batch (batch_size).
for b in range(batch_size):
pred = model.train_on_batch( np.reshape(batch[b,:], (1,timesteps,84,84,3)), np.reshape(y[b,:], (1,timesteps,num_actions)) )
#for t in range(timesteps):
# pred = model.train_on_batch( np.reshape(batch[b,t,:], (1,1,84,84,3)), np.reshape(y[b,t,:], (1,1,num_actions)) )
model.reset_states() # Don't carry internal state between batches
# move trained network to robot
# This works, but it isn't practical to not get outputs (actions) until after 10 timesteps
#batch = np.random.uniform( low=0, high=255, size=(1,timesteps,84,84,3) )
#pred = model.predict( batch, batch_size=1 )
# This is what I would need to do on my robot, with the LSTM keeping state between calls to predict
max_time = 10 # or 100000, or forever, etc.
for i in range(max_time) :
image = np.random.uniform( low=0, high=255, size=(1,1,84,84,3) ) # pull one image from camera
# stateful should be true here
pred = model.predict( image, batch_size=1 )
# take action based on pred
print( pred )
【问题讨论】:
-
您的机器人在处理图像吗?这就是 84 x 84 应该的样子吗?
-
是的,84x84x3(宽度、高度、颜色通道)。