【问题标题】:slot-filling intent-detection joint model槽填充意图检测联合模型
【发布时间】:2020-06-03 22:07:41
【问题描述】:

大家好,我为聊天机器人开发了两个 RNN 模型。假设用户说:“告诉我明天巴黎的天气如何”。 第一个模型将能够识别用户的意图 WEATHER_INFO ,而第二个模型将能够从 LOC:Paris 和 DATE:tomorrow 等短语中提取有意义的信息 当然还有许多其他意图类别,例如 MUSIC_PLAY 等等。 由于这两个模型没有以任何方式链接到同一个短语,我们可以得到例如结果 intent:WEATHER_INFO 和 TITLE:Paris ,其中插槽 TITLE 改为链接到 MUSIC_PLAY 意图 许多研究人员试图通过创建一个联合模型来提高性能,其中两个模型使用另一个模型以避免这种错误 这是我的插槽填充代码 nn

from keras.models import Model, Input
from keras.layers import LSTM, Embedding, Dense, TimeDistributed, Dropout, Bidirectional
import keras as k
from keras_contrib.layers import CRF


# input = Input(shape=(140,))
# input = Input(shape=(len(X_train),max_len))
input = Input(shape=(max_len,))

# word_embedding_size = 150
word_embedding_size = 150
n_words = len(token_ids)


# Embedding Layer
model = Embedding(input_dim=n_words, output_dim=word_embedding_size, input_length=max_len)(input)
# model = Embedding(input_dim=n_words, output_dim=word_embedding_size, input_length=140)(input)

# BI-LSTM Layer
model = Bidirectional(LSTM(units=word_embedding_size,
                           return_sequences=True,
                           dropout=0.5,
                           recurrent_dropout=0.5,
                           kernel_initializer=k.initializers.he_normal()))(model)
model = LSTM(units=word_embedding_size * 2,
             return_sequences=True,
             dropout=0.5,
             recurrent_dropout=0.5,
             kernel_initializer=k.initializers.he_normal())(model)

# TimeDistributed Layer
model = TimeDistributed(Dense(n_tags, activation="relu"))(model)

# CRF Layer
crf = CRF(n_tags)

out = crf(model)  # output
model = Model(input, out)

# In[]

# model compile and fit

from keras.callbacks import ModelCheckpoint
import matplotlib.pyplot as plt

# Optimiser
adam = k.optimizers.Adam(lr=0.0005, beta_1=0.9, beta_2=0.999)

# Compile model
model.compile(optimizer=adam, loss=crf.loss_function, metrics=[crf.accuracy, 'accuracy'])

model.summary()

# Saving the best model only
filepath = "ner-bi-lstm-td-model-{val_accuracy:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

# Fit the best model
history = model.fit(X_train, y_train, batch_size=256, epochs=10, validation_split=0.1, verbose=1,
                    callbacks=callbacks_list)

这里是意图检测NN的代码

#CNN architecture    

from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras import layers

batch_size = 128

epochs = 12
if nn_architecture == 'CNN':
    model_CNN = Sequential()
    e = Embedding(vocab_size, 300, weights=[embedding_matrix], input_length=max_length, trainable=False)
    model_CNN.add(e)
    model_CNN.add(Dropout(0.2))
    # we add a Convolution1D, which will learn filters
    # word group filters of size filter_length:
    filters = 50
    kernel_size = 3    
    hidden_dims = 250
    model_CNN.add(layers.Conv1D(filters,
                     kernel_size,
                     padding='valid',
                     activation='relu',
                     strides=1))
    # we use max pooling:
    model_CNN.add(layers.GlobalMaxPooling1D())

    # We add a vanilla hidden layer:
    model_CNN.add(Dense(hidden_dims))
    model_CNN.add(Dropout(0.2))
    model_CNN.add(layers.Activation('relu'))

    # We project onto a single unit output layer, and squash it with a sigmoid:
    model_CNN.add(Dense(nbClasses)) # no_of_categories
    model_CNN.add(layers.Activation('sigmoid'))

    model_CNN.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    history_CNN =model_CNN.fit(X_train, Y_train_c,
              batch_size=batch_size,
              epochs=epochs,
              #validation_split=0.2
              )
    # Epoch 12/12
    # 38771/38771 [==============================] - 11s 276us/step - 
    #loss: 0.0046 - accuracy: 0.9985

我想要的是合并这两种架构以获得这个

请帮助我...提前谢谢

【问题讨论】:

    标签: nlp recurrent-neural-network gated-recurrent-unit


    【解决方案1】:

    如今这几乎不是一个研究问题,但是... 这是 github.io 上的文章,它完全符合您的要求 - 在一个模型中结合了意图分类和插槽填充任务。 https://chsasank.github.io/spoken-language-understanding.html

    由于只有链接的答案不受欢迎,这里也是模型架构 - 我稍微修改了一下,但总的来说这里是 Keras 代码:

    def build_model(self):
        main_input = Input(shape=(15), dtype='int32', name='main_input')
        x = Embedding(output_dim=self.embedding_dimension, input_dim=n_vocab, input_length=15)(main_input)
        x = Convolution1D(64, 5, padding='same', activation='relu')(x)
    
        if self.dropout_parameter > 0.0:
            x = Dropout(self.dropout_parameter)(x)
    
        if self.rnn_type is 'GRU':
            rnn = GR    def build_model(self):
        main_input = Input(shape=(15), dtype='int32', name='main_input')
        x = Embedding(output_dim=self.embedding_dimension, input_dim=n_vocab, input_length=15)(main_input)
        x = Convolution1D(64, 5, padding='same', activation='relu')(x)
    
        if self.dropout_parameter > 0.0:
            x = Dropout(self.dropout_parameter)(x)
    
        if self.rnn_type is 'GRU':
            rnn = GRU(self.rnn_units, return_sequences=True)
    
        elif self.rnn_type is 'LSTM':
            rnn = LSTM(self.rnn_units, return_sequences=True)
        else:
            rnn = SimpleRNN(self.rnn_units)
    
        if self.bidirectional:
            rnn_slot = Bidirectional(rnn)(x)
        else:
            rnn_slot = rnn(x)
    
        rnn_intent = GRU(self.rnn_units, return_sequences=False)(x)
    
        if self.maxPooling:
            x = MaxPooling1D(strides=1, padding='same')(x)
            print("Using MaxPooling")
        elif self.averagePooling:
            x = AveragePooling1D(strides=1, padding='same')(x)
            print("Using AveragePooling")
        slot_output = Dense(n_slots, activation='softmax', name='slot_output')(rnn_slot)
        intent_output = Dense(n_classes, activation='softmax', name='intent_output')(rnn_intent)
        model = kerasModel(inputs=[main_input], outputs=[intent_output, slot_output])
    
        # rmsprop is recommended for RNNs https://stats.stackexchange.com/questions/315743/rmsprop-and-adam-vs-sgd
        model.compile(optimizer='rmsprop', loss={'intent_output': 'categorical_crossentropy', 'slot_output': 'categorical_crossentropy'}, metrics='accuracy')
        model.summary()
        self.model = model
    
        return 0U(self.rnn_units, return_sequences=True)
    
        elif self.rnn_type is 'LSTM':
            rnn = LSTM(self.rnn_units, return_sequences=True)
        else:
            rnn = SimpleRNN(self.rnn_units)
    
        if self.bidirectional:
            rnn_slot = Bidirectional(rnn)(x)
        else:
            rnn_slot = rnn(x)
    
        rnn_intent = GRU(self.rnn_units, return_sequences=False)(x)
    
        if self.maxPooling:
            x = MaxPooling1D(strides=1, padding='same')(x)
            print("Using MaxPooling")
        elif self.averagePooling:
            x = AveragePooling1D(strides=1, padding='same')(x)
            print("Using AveragePooling")
        slot_output = Dense(n_slots, activation='softmax', name='slot_output')(rnn_slot)
        intent_output = Dense(n_classes, activation='softmax', name='intent_output')(rnn_intent)
        model = kerasModel(inputs=[main_input], outputs=[intent_output, slot_output])
    
        # rmsprop is recommended for RNNs https://stats.stackexchange.com/questions/315743/rmsprop-and-adam-vs-sgd
        model.compile(optimizer='rmsprop', loss={'intent_output': 'categorical_crossentropy', 'slot_output': 'categorical_crossentropy'}, metrics='accuracy')
        model.summary()
        self.model = model
    
        return 0
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 2012-03-02
      • 1970-01-01
      • 2017-03-14
      • 1970-01-01
      • 1970-01-01
      • 2020-03-20
      • 2012-04-02
      • 2012-04-23
      相关资源
      最近更新 更多