【发布时间】:2021-10-20 10:35:21
【问题描述】:
我正在尝试使用来自 huggingface 转换器库的 BERT 解决多标签文本分类问题。模型定义如下:
def create_model(encoder, nb_classes=3, lr=1e-5):
# inputs
input_ids = tf.keras.Input(shape=(512,), ragged=False,
dtype=tf.int32, name='input_ids')
input_attention_mask = tf.keras.Input(shape=(512,), ragged=False,
dtype=tf.int32, name='attention_mask')
# transformer
output = encoder({'input_ids': input_ids,
'attention_mask': input_attention_mask})[0]
Y = tf.keras.layers.BatchNormalization()(output)
Y = tf.keras.layers.Dense(nb_classes, activation='sigmoid')(Y)
# compilation
model = tf.keras.Model(inputs=[input_ids, input_attention_mask],
outputs=[Y])
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
# losses
# loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
# loss = tf.keras.losses.BinaryCrossentropy(from_logits=False)
model.compile(optimizer=optimizer,
loss=multilabel_loss, metrics=['acc'])
model.summary()
return model
如您所见,我尝试使用 tf.keras.losses,但它不起作用(抛出AttributeError: 'Tensor' object has no attribute 'nested_row_splits'),所以我手动定义了一个简单的交叉熵:
def multilabel_loss(y_true, y_pred):
y_pred = tf.convert_to_tensor(y_pred)
y_true = tf.cast(y_true, y_pred.dtype)
cross_entropy = -tf.reduce_sum((y_true*tf.math.log(y_pred + 1e-8) + (1 - y_true) * tf.math.log(1 - y_pred + 1e-8)),
name='xentropy')
return cross_entropy
模型是使用 strategy.scope() 创建的,如下所示,使用 'distil-bert-uncased' 作为检查点:
with strategy.scope():
encoder = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
#encoder = TFRobertaForSequenceClassification.from_pretrained(checkpoint)
model = create_model(encoder)
标签是二进制数组:
163350 [0, 0, 1]
118940 [0, 0, 1]
65243 [0, 0, 1]
30011 [0, 0, 1]
189713 [0, 1, 0]
它们在下一个函数中与标记化文本组合成一个 tf.dataset:
def tf_text_data_prep(df):
"""
input: takes pandas dataframe
output: returns tokenized tf.Dataset
"""
hugging_ds = Dataset.from_pandas(df)
tokenized_ds = hugging_ds.map(
tokenize_function,
batched=True,
num_proc=strategy.num_replicas_in_sync,
remove_columns=["Text", '__index_level_0__'],
load_from_cache_file=True
)
# Convert to tensorflow
tf_dataset = tokenized_ds.with_format("tensorflow")
features = {x: tf_dataset[x].to_tensor() for x in tokenizer.model_input_names}
tf_data = tf.data.Dataset.from_tensor_slices((features, tf_dataset["label"]))
return tf_data
问题是当我启动训练时,我得到了错误:
TypeError Traceback (most recent call last)
<ipython-input-62-720b4634d50e> in <module>()
----> 1 get_ipython().run_cell_magic('time', '', 'steps_per_epoch = int(BUFFER_SIZE // BATCH_SIZE)\nprint(\n f"Model Params:\\nbatch_size: {BATCH_SIZE}\\nEpochs: {EPOCHS}\\n"\n f"Step p. Epoch: {steps_per_epoch}\\n"\n f"Initial Learning rate: {INITAL_LEARNING_RATE}"\n)\nhistory = model.fit(\n train_ds,\n validation_data=val_ds,\n batch_size=BATCH_SIZE,\n epochs=EPOCHS,\n callbacks=callbacks,\n verbose=1,\n)')
12 frames
/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
2115 magic_arg_s = self.var_expand(line, stack_depth)
2116 with self.builtin_trap:
-> 2117 result = fn(magic_arg_s, cell)
2118 return result
2119
<decorator-gen-53> in time(self, line, cell, local_ns)
/usr/local/lib/python3.7/dist-packages/IPython/core/magic.py in <lambda>(f, *a, **k)
186 # but it's overkill for just that one bit of state.
187 def magic_deco(arg):
--> 188 call = lambda f, *a, **k: f(*a, **k)
189
190 if callable(arg):
/usr/local/lib/python3.7/dist-packages/IPython/core/magics/execution.py in time(self, line, cell, local_ns)
1191 else:
1192 st = clock2()
-> 1193 exec(code, glob, local_ns)
1194 end = clock2()
1195 out = None
<timed exec> in <module>()
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1176 _r=1):
1177 callbacks.on_train_batch_begin(step)
-> 1178 tmp_logs = self.train_function(iterator)
1179 if data_handler.should_sync:
1180 context.async_wait()
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
887
888 with OptionalXlaContext(self._jit_compile):
--> 889 result = self._call(*args, **kwds)
890
891 new_tracing_count = self.experimental_get_tracing_count()
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
931 # This is the first call of __call__, so we have to initialize.
932 initializers = []
--> 933 self._initialize(args, kwds, add_initializers_to=initializers)
934 finally:
935 # At this point we know that the initialization is complete (or less
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
762 self._concrete_stateful_fn = (
763 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
--> 764 *args, **kwds))
765
766 def invalid_creator_scope(*unused_args, **unused_kwds):
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
3048 args, kwargs = None, None
3049 with self._lock:
-> 3050 graph_function, _ = self._maybe_define_function(args, kwargs)
3051 return graph_function
3052
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
3442
3443 self._function_cache.missed.add(call_context_key)
-> 3444 graph_function = self._create_graph_function(args, kwargs)
3445 self._function_cache.primary[cache_key] = graph_function
3446
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3287 arg_names=arg_names,
3288 override_flat_arg_shapes=override_flat_arg_shapes,
-> 3289 capture_by_value=self._capture_by_value),
3290 self._function_attributes,
3291 function_spec=self.function_spec,
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
997 _, original_func = tf_decorator.unwrap(python_func)
998
--> 999 func_outputs = python_func(*func_args, **func_kwargs)
1000
1001 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/def_function.py in wrapped_fn(*args, **kwds)
670 # the function a weak reference to itself to avoid a reference cycle.
671 with OptionalXlaContext(compile_with_xla):
--> 672 out = weak_wrapped_fn().__wrapped__(*args, **kwds)
673 return out
674
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
984 except Exception as e: # pylint:disable=broad-except
985 if hasattr(e, "ag_error_metadata"):
--> 986 raise e.ag_error_metadata.to_exception(e)
987 else:
988 raise
TypeError: in user code:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:850 train_function *
return step_function(self, iterator)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:840 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:1285 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:2833 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:3608 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:833 run_step **
outputs = model.train_step(data)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:795 train_step
self.compiled_metrics.update_state(y, y_pred, sample_weight)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/compile_utils.py:460 update_state
metric_obj.update_state(y_t, y_p, sample_weight=mask)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/utils/metrics_utils.py:86 decorated
update_op = update_state_fn(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/metrics.py:177 update_state_fn
return ag_update_state(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/metrics.py:659 update_state **
[y_true, y_pred], sample_weight)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/utils/metrics_utils.py:546 ragged_assert_compatible_and_get_flat_values
raise TypeError('One of the inputs does not have acceptable types.')
TypeError: One of the inputs does not have acceptable types.
同样的方法适用于普通的二元分类,但不适用于多标签。 对于错误或一般方法的任何帮助,我将不胜感激。
【问题讨论】:
标签: python tensorflow machine-learning keras huggingface-transformers