【发布时间】:2020-07-21 02:14:22
【问题描述】:
我正在为文档相似性设计一个连体网络。 Google 新闻数据集用于矢量化。我使用了两个输入层嵌入并将单独的输入提供给 LSTM,然后使用曼哈顿指数函数找到它的相似性。我已附上我的型号代码
#Setting Parameters for the model
# Model variables
max_seq_length = 200
n_hidden = 50
gradient_clipping_norm = 1.25
batch_size = 64
n_epoch = 25
embedding_dim = 300
embeddings = 1 * np.random.randn() + 1, embedding_dim
class DocSim(object):
def __init__(self, w2v_model , stopwords=[]):
self.w2v_model = w2v_model
self.stopwords = stopwords
def vectorize(self, doc):
"""Identify the vector values for each word in the given document"""
doc = doc.lower()
words = [w for w in doc.split(" ") if w not in self.stopwords]
word_vecs = []
for word in words:
try:
vec = self.w2v_model[word]
word_vecs.append(vec)
except KeyError:
# Ignore, if the word doesn't exist in the vocabulary
pass
# Assuming that the document vector is the mean of all the word vectors
vector = np.mean(word_vecs, axis=0)
return vector
def exponent_neg_manhattan_distance(self,left, right):
return K.exp(-K.sum(K.abs(left-right), axis=1, keepdims=True))
# The visible layer
left_input = Input(shape=(max_seq_length,), dtype='int32')
right_input = Input(shape=(max_seq_length,), dtype='int32')
embedding_layer = Embedding(len(embeddings), embedding_dim, weights=[embeddings], input_length=max_seq_length, trainable=False)
# Embedded version of the inputs
encoded_left = embedding_layer(left_input)
encoded_right = embedding_layer(right_input)
# Since this is a siamese network, both sides share the same LSTM
shared_lstm = LSTM(n_hidden)
left_output = shared_lstm(encoded_left)
right_output = shared_lstm(encoded_right)
# Calculates the distance as defined by the MaLSTM model
malstm_distance = Lambda(function=lambda x: exponent_neg_manhattan_distance(x[0], x[1]),output_shape=lambda x: (x[0][0], 1))([left_output, right_output])
# Pack it all up into a model
malstm = Model([left_input, right_input], [malstm_distance])
# Adadelta optimizer, with gradient clipping by norm
optimizer = Adadelta(clipnorm=gradient_clipping_norm)
malstm.compile(loss='mean_squared_error', optimizer=optimizer, metrics=['accuracy'])
# Start training
training_start_time = time()
malstm_trained = malstm.fit([X_train['left'], X_train['right']], Y_train, batch_size=batch_size, nb_epoch=n_epoch,
validation_data=([X_validation['left'], X_validation['right']], Y_validation))
print("Training time finished.\n{} epochs in {}".format(n_epoch, datetime.timedelta(seconds=time()-training_start_time)))
def calculate_similarity(self, withdigits_source_rules, withdigits_target_rules=[], threshold=0.8):
"""Calculates & returns similarity scores between given source rules & all
the target rules"""
if isinstance(withdigits_target_rules, str):
withdigits_target_rules = [withdigits_target_rules]
source_vec = self.vectorize(withdigits_source_rules)
results = []
for rule in withdigits_target_rules:
target_vec = self.vectorize(rule)
sim_score = self.exponent_neg_manhattan_distance (source_vec, target_vec)
if sim_score > threshold:
results.append({
'Siamese Sim Score':sim_score,
'Target Rule':rule
})
# Sort results by score in desc order
results.sort(key=lambda k : k['Siamese Sim Score'] , reverse=True)
return results
这个错误可能是因为嵌入层的输入形状。如果有人可以指导我,那就太好了。 我得到的错误是:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-4-2df6269816cc> in <module>
14
15
---> 16 class DocSim(object):
17 def __init__(self, w2v_model , stopwords=[]):
18 self.w2v_model = w2v_model
<ipython-input-4-2df6269816cc> in DocSim()
48
49 # Embedded version of the inputs
---> 50 encoded_left = embedding_layer(left_input)
51 encoded_right = embedding_layer(right_input)
52
~\.conda\envs\gpuversion\lib\site-packages\keras\engine\base_layer.py in __call__(self, inputs, **kwargs)
466 # Load weights that were specified at layer instantiation.
467 if self._initial_weights is not None:
--> 468 self.set_weights(self._initial_weights)
469
470 # Raise exceptions in case the input is not compatible
~\.conda\envs\gpuversion\lib\site-packages\keras\engine\base_layer.py in set_weights(self, weights)
1120 param_values = K.batch_get_value(params)
1121 for pv, p, w in zip(param_values, params, weights):
-> 1122 if pv.shape != w.shape:
1123 raise ValueError('Layer weight shape ' +
1124 str(pv.shape) +
AttributeError: 'tuple' object has no attribute 'shape'
【问题讨论】:
标签: python tensorflow keras tuples