【发布时间】:2019-11-12 01:50:18
【问题描述】:
我使用受http://mlexplained.com/2019/02/15/building-an-lstm-from-scratch-in-pytorch-lstms-in-depth-part-1/ 启发的非常定制的 LSTM 单元。
我用它来查看中间门控值。我的问题是,我将如何扩展此类以添加更多层和添加双向性的选项?它应该被包装在一个新的类中还是添加到当前的类中?
class Dim(IntEnum):
batch = 0
seq = 1
class simpleLSTM(nn.Module):
def __init__(self, input_sz: int, hidden_sz: int):
super().__init__()
self.input_size = input_sz
self.hidden_size = hidden_sz
# input gate
self.W_ii = Parameter(torch.Tensor(input_sz, hidden_sz))
self.W_hi = Parameter(torch.Tensor(hidden_sz, hidden_sz))
self.b_i = Parameter(torch.Tensor(hidden_sz))
# forget gate
self.W_if = Parameter(torch.Tensor(input_sz, hidden_sz))
self.W_hf = Parameter(torch.Tensor(hidden_sz, hidden_sz))
self.b_f = Parameter(torch.Tensor(hidden_sz))
# ???
self.W_ig = Parameter(torch.Tensor(input_sz, hidden_sz))
self.W_hg = Parameter(torch.Tensor(hidden_sz, hidden_sz))
self.b_g = Parameter(torch.Tensor(hidden_sz))
# output gate
self.W_io = Parameter(torch.Tensor(input_sz, hidden_sz))
self.W_ho = Parameter(torch.Tensor(hidden_sz, hidden_sz))
self.b_o = Parameter(torch.Tensor(hidden_sz))
self.init_weights()
self.out = nn.Linear(hidden_sz, len(TRG.vocab))
def init_weights(self):
for p in self.parameters():
if p.data.ndimension() >= 2:
nn.init.xavier_uniform_(p.data)
else:
nn.init.zeros_(p.data)
def forward(self, x, init_states=None ):
"""Assumes x is of shape (batch, sequence, feature)"""
seq_sz, bs, = x.size()
hidden_seq = []
prediction = []
if init_states is None:
h_t, c_t = torch.zeros(self.hidden_size).to(x.device), torch.zeros(self.hidden_size).to(x.device)
else:
h_t, c_t = init_states
for t in range(seq_sz): # iterate over the time steps
x_t = x[t, :].float()
#LOOK HERE!!!
i_t = torch.sigmoid(x_t @ self.W_ii + h_t @ self.W_hi + self.b_i)
f_t = torch.sigmoid(x_t @ self.W_if + h_t @ self.W_hf + self.b_f)
g_t = torch.tanh(x_t @ self.W_ig + h_t @ self.W_hg + self.b_g)
o_t = torch.sigmoid(x_t @ self.W_io + h_t @ self.W_ho + self.b_o)
c_t = f_t * c_t + i_t * g_t
h_t = o_t * torch.tanh(c_t)
hidden_seq.append(h_t.unsqueeze(Dim.batch))
pred_t = self.out(h_t.unsqueeze(Dim.batch))
#pred_t = F.softmax(pred_t)
prediction.append(pred_t)
hidden_seq = torch.cat(hidden_seq, dim=Dim.batch)
prediction = torch.cat(prediction, dim=Dim.batch)
# reshape from shape (sequence, batch, feature) to (batch, sequence, feature)
hidden_seq = hidden_seq.transpose(Dim.batch, Dim.seq).contiguous()
prediction = prediction.transpose(Dim.batch, Dim.seq).contiguous()
return prediction, hidden_seq, (h_t, c_t)
我调用它并使用以下示例进行训练。
lstm = simpleLSTM(1, 100)
hidden_size = lstm.hidden_size
optimizer = optim.Adam(lstm.parameters())
h_0, c_0 = (torch.zeros(hidden_size, requires_grad=True),
torch.zeros(hidden_size, requires_grad=True))
grads = []
h_t, c_t = h_0, c_0
N_EPOCHS = 10
for epoch in range(N_EPOCHS):
epoch_loss = 0
for i, batch in enumerate(train):
optimizer.zero_grad()
src, src_len = batch.src
trg = batch.trg
trg = trg.view(-1)
predict, output, hidden_states = lstm(src)
predict = predict.t().unsqueeze(1)
predict= predict.view(-1, predict.shape[-1])
loss = criterion(predict,trg)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
print(epoch_loss)
【问题讨论】:
标签: python python-3.x deep-learning pytorch lstm