【发布时间】:2021-03-11 07:03:23
【问题描述】:
这里的目标是为浮点值和分类值设置插补层,我不想更改/预处理用作插补的数组类型。问题是,当从包含 np.nan 的 pandas df 读取数组时,我有一个数组,它既有字符串又有 nan,TF 解释为 float 并且在尝试创建张量时具有不匹配的 dtype。所以我想我可能需要 Lambda 的某种转换层。我不确定。
环境和数据
import numpy as np
from scipy.stats import mode
import tensorflow as tf
from tensorflow import keras
import pandas as pd
train = pd.read_csv('https://gist.githubusercontent.com/michhar/2dfd2de0d4f8727f873422c5d959fff5/raw/fa71405126017e6a37bea592440b4bee94bf7b9e/titanic.csv')
有问题的数组:
cabin = train['Cabin'].values
array([nan, 'C85', nan, 'C123', nan, nan, 'E46', nan, nan, nan, 'G6', 'C103',南,南,南,南,南,南,南,南,南,'D56',南, 'A6',南,南,南,'C23 C25 C27',南,南,南,'B78',南,南, 南,南,南,南,南,南,南,南,南,南,南,南,南, 南,南,南,南,南,'D33',南,'B30','C52',南,南,南, 南,南,'B28','C83',南,南,南,'F33',南,南,南,南, 南,南,南,南,'F G73',南,南,南,南,南,南,南, 南,南,南,南,南,'C23 C25 C27' ..]
插补层
class Imputation(keras.layers.Layer):
def adapt(self, data_sample):
if data_sample.dtype in ('float32', 'float64'):
self.means_ = np.nanmean(data_sample, axis=0, keepdims=0)
print(self.means_)
if data_sample.dtype == 'object':
self.mode_ = mode(data_sample, axis=0)[0]
print(self.mode_)
def call(self, input):
if input.dtype in ('float32', 'float64'):
input = tf.where(tf.math.is_nan(input),
tf.constant(self.means_, dtype=tf.float32),
input)
if input.dtype == tf.string:
# HERE i need help
input = tf.where(input == 'NaN',
tf.constant(self.mode_, dtype=tf.string),
input)
return input
图层适用的示例,例如整数
imputed = Imputation()
imputed.adapt(train['Age'].values)
imputed(train['Age'].values)
图层与预处理的分类数组一起使用的示例(我不想这样做)
imputed = Imputation()
imputed.adapt(train['Cabin'].values)
cabin2 = train['Cabin'].replace(np.nan, 'NaN').values
imputed(cabin2)
array([b'C23 C25 C27', b'C85', b'C23 C25 C27', b'C123', b'C23 C25 C27', b'C23 C25 C27',b'E46',b'C23 C25 C27',b'C23 C25 C27', b'C23 C25 C27'、b'G6'、b'C103'、b'C23 C25 C27'、b'C23 C25 C27'、 b'C23 C25 C27'、b'C23 C25 C27'、b'C23 C25 C27'、b'C23 C25 C27'、 b'C23 C25 C27'、b'C23 C25 C27'、b'C23 C25 C27'、b'D56'、 b'C23 C25 C27'、b'A6'、b'C23 C25 C27'、b'C23 C25 C27'、 b'C23 C25 C27'、b'C23 C25 C27'、b'C23 C25 C27'、b'C23 C25 C27'、 b'C23 C25 C27'、b'B78'、b'C23 C25 C27'、b'C23 C25 C27'、 b'C23 C25 C27', b'C23 C25 C27', b'C23 C25 C27', b'C23 C25 C27', ...])
我的问题案例:
imputed = Imputation()
imputed.adapt(train['Cabin'].values)
cabin = train['Cabin'].values
imputed(cabin)
错误:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-46-ce3b039382a7> in <module>()
2 imputed.adapt(train['Cabin'].values)
3 cabin = train['Cabin'].values
----> 4 imputed(cabin)
10 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
931 # Accept NumPy and scalar inputs by converting to Tensors.
932 if any(isinstance(x, (np.ndarray, float, int)) for x in input_list):
--> 933 inputs = nest.map_structure(_convert_numpy_or_python_types, inputs)
934 input_list = nest.flatten(inputs)
935
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/nest.py in map_structure(func, *structure, **kwargs)
633
634 return pack_sequence_as(
--> 635 structure[0], [func(*x) for x in entries],
636 expand_composites=expand_composites)
637
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/nest.py in <listcomp>(.0)
633
634 return pack_sequence_as(
--> 635 structure[0], [func(*x) for x in entries],
636 expand_composites=expand_composites)
637
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in _convert_numpy_or_python_types(x)
3237 def _convert_numpy_or_python_types(x):
3238 if isinstance(x, (np.ndarray, float, int)):
-> 3239 return ops.convert_to_tensor_v2(x)
3240 return x
3241
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py in convert_to_tensor_v2(value, dtype, dtype_hint, name)
1380 name=name,
1381 preferred_dtype=dtype_hint,
-> 1382 as_ref=False)
1383
1384
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py in convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, dtype_hint, ctx, accepted_result_types)
1497
1498 if ret is None:
-> 1499 ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
1500
1501 if ret is NotImplemented:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/tensor_conversion_registry.py in _default_conversion_function(***failed resolving arguments***)
50 def _default_conversion_function(value, dtype, name, as_ref):
51 del as_ref # Unused.
---> 52 return constant_op.constant(value, dtype, name=name)
53
54
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py in constant(value, dtype, shape, name)
262 """
263 return _constant_impl(value, dtype, shape, name, verify_shape=False,
--> 264 allow_broadcast=True)
265
266
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py in _constant_impl(value, dtype, shape, name, verify_shape, allow_broadcast)
273 with trace.Trace("tf.constant"):
274 return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
--> 275 return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
276
277 g = ops.get_default_graph()
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py in _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
298 def _constant_eager_impl(ctx, value, dtype, shape, verify_shape):
299 """Implementation of eager constant."""
--> 300 t = convert_to_eager_tensor(value, ctx, dtype)
301 if shape is None:
302 return t
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py in convert_to_eager_tensor(value, ctx, dtype)
96 dtype = dtypes.as_dtype(dtype).as_datatype_enum
97 ctx.ensure_initialized()
---> 98 return ops.EagerTensor(value, ctx.device_name, dtype)
99
100
ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type float).
【问题讨论】:
标签: python python-3.x numpy tensorflow tensorflow2.0