【问题标题】:migrating from keras to pytorch从 keras 迁移到 pytorch
【发布时间】:2020-07-24 15:21:44
【问题描述】:

我是 pytorch 的新成员 它是具有双向 lstm 的模型,有没有人告诉我这两种不同的 lstm 和双向 lstm 模型的等价物是什么? 我尝试了一些火炬代码,但它不起作用。因为这个代码在 keras 中有合适的 acc,我想要火炬中的确切模型,但不幸的是我找不到它:( 拳头:

def lstm_model(embedding_size, vocab_size):

    title = layers.Input(shape=(None,), dtype='int32', name='title')
    body = layers.Input(shape=(None,), dtype='int32', name='body')

    embedding = layers.Embedding(
        mask_zero=True,
        input_dim=vocab_size,
        output_dim=embedding_size,
        weights=[w2v_weights],
        trainable=True
    )

    lstm_1 = layers.LSTM(units=80, return_sequences=True)
    lstm_2 = layers.LSTM(units=80, return_sequences=False)
    emb_title = embedding(title)
    print("question embedding shape", emb_title.shape)
    sum_a = lstm_2(lstm_1(emb_title))
    print("q_output shape", sum_a.shape)

    emb_body = embedding(body)
    print("answer embedding shape", emb_body.shape)

    sum_b = lstm_2(lstm_1(emb_body))
    print("a_output shape", sum_a.shape)

    sim = layers.dot([sum_a, sum_b], axes=1, normalize=True)
    print("qa_similarity shape", sim.shape)

    #     sim = layers.Activation(activation='sigmoid')(sim)
    sim_model = models.Model(
        inputs=[title, body],
        outputs=[sim],
    )
    sim_model.compile(loss='mean_squared_error', optimizer='nadam', metrics=['accuracy'])

    embedding_model = models.Model(
        inputs=[title],
        outputs=[sum_a]
    )
    return sim_model, embedding_model

第二个:



def bilstm_model(embedding_size, vocab_size):
    title = layers.Input(shape=(None,), dtype='int32', name='title')
    body = layers.Input(shape=(None,), dtype='int32', name='body')

    embedding = layers.Embedding(
        mask_zero=True,
        input_dim=vocab_size,
        output_dim=embedding_size,
        weights=[w2v_weights],
        trainable=True
    )

    lstm_1 = layers.Bidirectional(LSTM(activation='tanh', dropout=0.2, units=100, return_sequences=True))
    lstm_2 = layers.Bidirectional(LSTM(activation='tanh', dropout=0.2, units=100, return_sequences=False))
    sum_a = lstm_2(lstm_1(embedding(title)))
    sum_b = lstm_2(lstm_1(embedding(body)))

    sim = layers.dot([sum_a, sum_b], axes=1, normalize=True)
    #     sim = layers.Activation(activation='sigmoid')(sim)
    sim_model = models.Model(
        inputs=[title, body],
        outputs=[sim],
    )
    sim_model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])

    embedding_model = models.Model(
        inputs=[title],
        outputs=[sum_a]
    )
    return sim_model, embedding_model

i;m llokingo 在几周内得到真正的答案:(

【问题讨论】:

    标签: python keras deep-learning pytorch


    【解决方案1】:

    这是您的第一个 lstm_model 到 PyTorch 的可能转换

    通常,您在 PyTorch 中为您的网络创建一个类。

    因此我将使用一个类来实现 LSTM

    from torch import nn
    import torch.nn.functional as F
    
    
    class LSTMModel(nn.Module):
        def __init__(self, vocab_size, hidden_size, num_layers,
                     dropout, embedding_size):
            super(LSTMModel, self).__init__()
            self.encoder = nn.Embedding(num_embeddings=embedding_size,
                                        embedding_dim=vocab_size)
            self.rnn = getattr(nn, 'LSTM')(vocab_size,
                                           hidden_size,
                                           num_layers,
                                           dropout=dropout)
            self.decoder = nn.Linear(in_features=hidden_size,
                                     out_features=embedding_size)
            self.init_weights()
            self.hidden_size = hidden_size
            self.weight_size = (num_layers, vocab_size, hidden_size)
    
        def init_weights(self):
            init_range = 0.1
            nn.init.uniform_(self.encoder.weight, -init_range,
                             init_range)
            nn.init.zeros_(self.decoder.weight)
            nn.init.uniform_(self.decoder.weight, -init_range,
                             init_range)
    
        def forward(self, input_, hidden_):
            embedded = self.encoder(input_)
            output, hidden_ = self.rnn(embedded, hidden_)
            decoded = self.decoder(hidden_)
            return F.log_softmax(input=decoded, dim=1), hidden_
    
        def init_hidden(self):
            weight = next(self.parameters())
            return (weight.new_zeros(self.weight_size),
                    weight.new_zeros(self.weight_size))
    
    

    现在,如果你直接使用上面的网络,你可能会遇到一些问题。在这种情况下,您需要修改这些值。

    【讨论】:

    • tnx 非常感谢您的回答,请问这个实现是真的吗?如果没有,请帮助使此代码与 keras 代码一样准确:)
    【解决方案2】:
    class Model(nn.Module):
        def __init__(self, **kwargs):
            super().__init__()
    
            self.embeddings = nn.Embedding(num_embeddings=kwargs["vocab_size"],
                                          embedding_dim=kwargs["embedding_dim"],
                                          padding_idx=kwargs["pad_idx"])
            self.embeddings.weight.requires_grad = True  # to not refine-tune
    
            if kwargs["model"] == "lstm":
                self.lstm = nn.LSTM(input_size=kwargs["embedding_dim"],  # input
                                    hidden_size=kwargs["lstm_units"],  # output
                                    num_layers=kwargs["lstm_layers"],
                                    bidirectional=False,
                                    batch_first=True)
            if kwargs["model"] == "BiLSTM":
                self.lstm = nn.LSTM(input_size=kwargs["embedding_dim"],  # input
                                    hidden_size=kwargs["bilstm_units"],  # output
                                    num_layers=kwargs["bilstm_layers"],
                                    bidirectional=True,
                                    batch_first=True)
    
            self.dropout = nn.Dropout(kwargs["dropout"])
            self.tanh = F.tanh
            self.dropout = nn.Dropout(kwargs["dropout"])
    
        def forward(self):
            pass
    
    
    class LSTM_Model(Model):
        """
        a class to define multiple models
        """
        def __init__(self, **kwargs):
            super().__init__(**kwargs)
    
        def forward(self, question, answer):
            question_embedding = self.embeddings(question)
            # print("question embedding shape:", question_embedding.shape)
            answer_embedding = self.embeddings(answer)
            # print("answer embedding shape:", answer_embedding.shape)
            q_output, (qhidden, qcell) = self.lstm(question_embedding)
            print("q_output shape:", q_output.shape)
            # print("qhidden shape:", qhidden.shape)
            # print("qcell shape:", qcell.shape)
            a_output, (ahidden, acell) = self.lstm(answer_embedding)
            print("a_output shape:", a_output.shape)
            # print("ahidden shape:", ahidden.shape)
            # print("acell shape:", acell.shape)
            # qa_similary = torch.mm(qhidden[-1], ahidden[-1])
            # qa_similary =torch.matmul((qhidden[-1]), torc.th(ahidden[-1]))
            q_output = q_output[-1]
            q_output = q_output.squeeze()
            a_output = a_output[-1]
            a_output = a_output.squeeze()
            mm = torch.mul((q_output), (a_output))
            mm -= mm.min(1, keepdim=True)[0]
            mm /= mm.max(1, keepdim=True)[0]
            qa_similary =torch.mean(mm, dim=1)
            # print("qa_similary shape:", qa_similary.shape)
            return qa_similary, qhidden
    
        print("**************************MODEL DEFINE & CREATED!****************************")
    

    这是对两层 lstm 的 keras 代码的真实且完全精确的实现吗?

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 2021-10-29
      • 2020-12-25
      • 2020-06-06
      • 2020-05-13
      • 2021-10-27
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多