从 keras 迁移到 pytorch答案

【问题标题】：migrating from keras to pytorch从 keras 迁移到 pytorch
【发布时间】：2020-07-24 15:21:44
【问题描述】：

我是 pytorch 的新成员它是具有双向 lstm 的模型，有没有人告诉我这两种不同的 lstm 和双向 lstm 模型的等价物是什么？我尝试了一些火炬代码，但它不起作用。因为这个代码在 keras 中有合适的 acc，我想要火炬中的确切模型，但不幸的是我找不到它:( 拳头：

def lstm_model(embedding_size, vocab_size):

    title = layers.Input(shape=(None,), dtype='int32', name='title')
    body = layers.Input(shape=(None,), dtype='int32', name='body')

    embedding = layers.Embedding(
        mask_zero=True,
        input_dim=vocab_size,
        output_dim=embedding_size,
        weights=[w2v_weights],
        trainable=True
    )

    lstm_1 = layers.LSTM(units=80, return_sequences=True)
    lstm_2 = layers.LSTM(units=80, return_sequences=False)
    emb_title = embedding(title)
    print("question embedding shape", emb_title.shape)
    sum_a = lstm_2(lstm_1(emb_title))
    print("q_output shape", sum_a.shape)

    emb_body = embedding(body)
    print("answer embedding shape", emb_body.shape)

    sum_b = lstm_2(lstm_1(emb_body))
    print("a_output shape", sum_a.shape)

    sim = layers.dot([sum_a, sum_b], axes=1, normalize=True)
    print("qa_similarity shape", sim.shape)

    #     sim = layers.Activation(activation='sigmoid')(sim)
    sim_model = models.Model(
        inputs=[title, body],
        outputs=[sim],
    )
    sim_model.compile(loss='mean_squared_error', optimizer='nadam', metrics=['accuracy'])

    embedding_model = models.Model(
        inputs=[title],
        outputs=[sum_a]
    )
    return sim_model, embedding_model

第二个：



def bilstm_model(embedding_size, vocab_size):
    title = layers.Input(shape=(None,), dtype='int32', name='title')
    body = layers.Input(shape=(None,), dtype='int32', name='body')

    embedding = layers.Embedding(
        mask_zero=True,
        input_dim=vocab_size,
        output_dim=embedding_size,
        weights=[w2v_weights],
        trainable=True
    )

    lstm_1 = layers.Bidirectional(LSTM(activation='tanh', dropout=0.2, units=100, return_sequences=True))
    lstm_2 = layers.Bidirectional(LSTM(activation='tanh', dropout=0.2, units=100, return_sequences=False))
    sum_a = lstm_2(lstm_1(embedding(title)))
    sum_b = lstm_2(lstm_1(embedding(body)))

    sim = layers.dot([sum_a, sum_b], axes=1, normalize=True)
    #     sim = layers.Activation(activation='sigmoid')(sim)
    sim_model = models.Model(
        inputs=[title, body],
        outputs=[sim],
    )
    sim_model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])

    embedding_model = models.Model(
        inputs=[title],
        outputs=[sum_a]
    )
    return sim_model, embedding_model

i;m llokingo 在几周内得到真正的答案：(

【问题讨论】：

标签： python keras deep-learning pytorch

【解决方案1】：

这是您的第一个 lstm_model 到 PyTorch 的可能转换

通常，您在 PyTorch 中为您的网络创建一个类。

因此我将使用一个类来实现 LSTM

from torch import nn
import torch.nn.functional as F


class LSTMModel(nn.Module):
    def __init__(self, vocab_size, hidden_size, num_layers,
                 dropout, embedding_size):
        super(LSTMModel, self).__init__()
        self.encoder = nn.Embedding(num_embeddings=embedding_size,
                                    embedding_dim=vocab_size)
        self.rnn = getattr(nn, 'LSTM')(vocab_size,
                                       hidden_size,
                                       num_layers,
                                       dropout=dropout)
        self.decoder = nn.Linear(in_features=hidden_size,
                                 out_features=embedding_size)
        self.init_weights()
        self.hidden_size = hidden_size
        self.weight_size = (num_layers, vocab_size, hidden_size)

    def init_weights(self):
        init_range = 0.1
        nn.init.uniform_(self.encoder.weight, -init_range,
                         init_range)
        nn.init.zeros_(self.decoder.weight)
        nn.init.uniform_(self.decoder.weight, -init_range,
                         init_range)

    def forward(self, input_, hidden_):
        embedded = self.encoder(input_)
        output, hidden_ = self.rnn(embedded, hidden_)
        decoded = self.decoder(hidden_)
        return F.log_softmax(input=decoded, dim=1), hidden_

    def init_hidden(self):
        weight = next(self.parameters())
        return (weight.new_zeros(self.weight_size),
                weight.new_zeros(self.weight_size))

现在，如果你直接使用上面的网络，你可能会遇到一些问题。在这种情况下，您需要修改这些值。

【讨论】：

tnx 非常感谢您的回答，请问这个实现是真的吗？如果没有，请帮助使此代码与 keras 代码一样准确:)

【解决方案2】：

class Model(nn.Module):
    def __init__(self, **kwargs):
        super().__init__()

        self.embeddings = nn.Embedding(num_embeddings=kwargs["vocab_size"],
                                      embedding_dim=kwargs["embedding_dim"],
                                      padding_idx=kwargs["pad_idx"])
        self.embeddings.weight.requires_grad = True  # to not refine-tune

        if kwargs["model"] == "lstm":
            self.lstm = nn.LSTM(input_size=kwargs["embedding_dim"],  # input
                                hidden_size=kwargs["lstm_units"],  # output
                                num_layers=kwargs["lstm_layers"],
                                bidirectional=False,
                                batch_first=True)
        if kwargs["model"] == "BiLSTM":
            self.lstm = nn.LSTM(input_size=kwargs["embedding_dim"],  # input
                                hidden_size=kwargs["bilstm_units"],  # output
                                num_layers=kwargs["bilstm_layers"],
                                bidirectional=True,
                                batch_first=True)

        self.dropout = nn.Dropout(kwargs["dropout"])
        self.tanh = F.tanh
        self.dropout = nn.Dropout(kwargs["dropout"])

    def forward(self):
        pass


class LSTM_Model(Model):
    """
    a class to define multiple models
    """
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def forward(self, question, answer):
        question_embedding = self.embeddings(question)
        # print("question embedding shape:", question_embedding.shape)
        answer_embedding = self.embeddings(answer)
        # print("answer embedding shape:", answer_embedding.shape)
        q_output, (qhidden, qcell) = self.lstm(question_embedding)
        print("q_output shape:", q_output.shape)
        # print("qhidden shape:", qhidden.shape)
        # print("qcell shape:", qcell.shape)
        a_output, (ahidden, acell) = self.lstm(answer_embedding)
        print("a_output shape:", a_output.shape)
        # print("ahidden shape:", ahidden.shape)
        # print("acell shape:", acell.shape)
        # qa_similary = torch.mm(qhidden[-1], ahidden[-1])
        # qa_similary =torch.matmul((qhidden[-1]), torc.th(ahidden[-1]))
        q_output = q_output[-1]
        q_output = q_output.squeeze()
        a_output = a_output[-1]
        a_output = a_output.squeeze()
        mm = torch.mul((q_output), (a_output))
        mm -= mm.min(1, keepdim=True)[0]
        mm /= mm.max(1, keepdim=True)[0]
        qa_similary =torch.mean(mm, dim=1)
        # print("qa_similary shape:", qa_similary.shape)
        return qa_similary, qhidden

    print("**************************MODEL DEFINE & CREATED!****************************")

这是对两层 lstm 的 keras 代码的真实且完全精确的实现吗？

【讨论】：