在 pytorch 中向自定义 LSTM 单元添加层和双向性答案

【问题标题】：Adding layers and bidirectionality to custom LSTM cell in pytorch在 pytorch 中向自定义 LSTM 单元添加层和双向性
【发布时间】：2019-11-12 01:50:18
【问题描述】：

我使用受http://mlexplained.com/2019/02/15/building-an-lstm-from-scratch-in-pytorch-lstms-in-depth-part-1/ 启发的非常定制的 LSTM 单元。

我用它来查看中间门控值。我的问题是，我将如何扩展此类以添加更多层和添加双向性的选项？它应该被包装在一个新的类中还是添加到当前的类中？

class Dim(IntEnum):
    batch = 0
    seq = 1

class simpleLSTM(nn.Module):
    def __init__(self, input_sz: int, hidden_sz: int):
        super().__init__()
        self.input_size = input_sz
        self.hidden_size = hidden_sz
        # input gate
        self.W_ii = Parameter(torch.Tensor(input_sz, hidden_sz))
        self.W_hi = Parameter(torch.Tensor(hidden_sz, hidden_sz))
        self.b_i = Parameter(torch.Tensor(hidden_sz))
        # forget gate
        self.W_if = Parameter(torch.Tensor(input_sz, hidden_sz))
        self.W_hf = Parameter(torch.Tensor(hidden_sz, hidden_sz))
        self.b_f = Parameter(torch.Tensor(hidden_sz))
        # ???
        self.W_ig = Parameter(torch.Tensor(input_sz, hidden_sz))
        self.W_hg = Parameter(torch.Tensor(hidden_sz, hidden_sz))
        self.b_g = Parameter(torch.Tensor(hidden_sz))
        # output gate
        self.W_io = Parameter(torch.Tensor(input_sz, hidden_sz))
        self.W_ho = Parameter(torch.Tensor(hidden_sz, hidden_sz))
        self.b_o = Parameter(torch.Tensor(hidden_sz))

        self.init_weights()

        self.out = nn.Linear(hidden_sz, len(TRG.vocab))


    def init_weights(self):
        for p in self.parameters():
            if p.data.ndimension() >= 2:
                nn.init.xavier_uniform_(p.data)
            else:
                nn.init.zeros_(p.data)

    def forward(self, x, init_states=None ):
        """Assumes x is of shape (batch, sequence, feature)"""
        seq_sz, bs, = x.size()
        hidden_seq = []
        prediction = []

        if init_states is None:
            h_t, c_t = torch.zeros(self.hidden_size).to(x.device), torch.zeros(self.hidden_size).to(x.device)
        else:
            h_t, c_t = init_states
        for t in range(seq_sz): # iterate over the time steps
            x_t = x[t, :].float()
            #LOOK HERE!!!
            i_t = torch.sigmoid(x_t @ self.W_ii + h_t @ self.W_hi + self.b_i)
            f_t = torch.sigmoid(x_t @ self.W_if + h_t @ self.W_hf + self.b_f)
            g_t = torch.tanh(x_t @ self.W_ig + h_t @ self.W_hg + self.b_g)
            o_t = torch.sigmoid(x_t @ self.W_io + h_t @ self.W_ho + self.b_o)
            c_t = f_t * c_t + i_t * g_t
            h_t = o_t * torch.tanh(c_t)

            hidden_seq.append(h_t.unsqueeze(Dim.batch))

            pred_t = self.out(h_t.unsqueeze(Dim.batch))
            #pred_t = F.softmax(pred_t)
            prediction.append(pred_t)


        hidden_seq = torch.cat(hidden_seq, dim=Dim.batch)
        prediction = torch.cat(prediction, dim=Dim.batch)
        # reshape from shape (sequence, batch, feature) to (batch, sequence, feature)
        hidden_seq = hidden_seq.transpose(Dim.batch, Dim.seq).contiguous()
        prediction = prediction.transpose(Dim.batch, Dim.seq).contiguous()


        return prediction, hidden_seq, (h_t, c_t)

我调用它并使用以下示例进行训练。

lstm = simpleLSTM(1, 100)
hidden_size = lstm.hidden_size

optimizer = optim.Adam(lstm.parameters())


h_0, c_0 = (torch.zeros(hidden_size, requires_grad=True), 
            torch.zeros(hidden_size, requires_grad=True))

grads = []
h_t, c_t = h_0, c_0


N_EPOCHS = 10

for epoch in range(N_EPOCHS):
    epoch_loss = 0
    for i, batch in enumerate(train):

        optimizer.zero_grad()

        src, src_len = batch.src
        trg = batch.trg
        trg = trg.view(-1)


        predict, output, hidden_states = lstm(src)
        predict = predict.t().unsqueeze(1)
        predict= predict.view(-1, predict.shape[-1])

        loss = criterion(predict,trg)
        loss.backward()

        optimizer.step()
        epoch_loss += loss.item()

    print(epoch_loss)

【问题讨论】：

标签： python python-3.x deep-learning pytorch lstm

【解决方案1】：

最简单的方法是创建另一个模块（例如Bidirectional）并将您想要的任何单元格传递给它。

实现本身很容易做到。请注意，我使用concat 操作来连接双向输出，您可能需要指定其他模式，如求和等。

请阅读下面代码中的 cmets，您可能需要适当更改。

import torch


class Bidirectional(torch.nn.Module):
    def __init__(self, cell):
        super().__init__()
        self.cell = cell

    def __call__(self, x, init_states=None):
        prediction, hidden_seq, (h_t, c_t) = self.cell(x, init_states)
        backward_prediction, backward_hidden_seq, (
            backward_h_t,
            backward_c_t,
            # Assuming sequence is first dimension, otherwise change 0 appropriately
            # Reverses sequences so the LSTM cell acts on the reversed sequence
        ) = self.cell(torch.flip(x, (0,)), init_states)
        return (
            # Assuming you transpose so it has (batch, seq, features) dimensionality
            torch.cat((prediction, backward_prediction), 2),
            torch.cat((hidden_seq, backward_hidden_seq), 2),
            # Assuming it has (batch, features) dimensionality
            torch.cat((h_t, backward_ht), 1),
            torch.cat((c_t, backward_ct), 1),
        )

当涉及到多层时，原则上您可以做类似的事情：

import torch


class Multilayer(torch.nn.Module):
    def __init__(self, *cells):
        super().__init__()
        self.cells = torch.nn.ModuleList(cells)

    def __call__(self, x, init_states=None):
        inputs = x
        for cell in self.cells:
            prediction, hidden_seq, (h_t, c_t) = cell(inputs, init_states)
            inputs = hidden_seq
        return prediction, hidden_seq, (h_t, c_t)

请注意，您必须将创建的单元格对象传递给Multilayer，例如：

# For three layers of LSTM, each needs features to be set up correctly
multilayer_LSTM = Multilayer(LSTM(), LSTM(), LSTM())

您也可以将类而不是实例传递给构造函数，并在 Multilayer 中创建它们（因此 hidden_size 会自动匹配），但这些想法应该可以帮助您入门。

【讨论】：