【发布时间】:2022-01-05 02:51:31
【问题描述】:
当我编写自定义缩放器来缩放我的数据而不缩放我已经创建的假人时,我收到关于自定义缩放器没有副本的错误?
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import StandardScaler
class CustomScaler(BaseEstimator,TransformerMixin):
def __init__(self,columns,copy=True,with_mean=True,with_std=True):
self.scaler = StandardScaler(copy,with_mean,with_std)
self.columns = columns
self.mean_ = None
self.var_ = None
def fit(self, X, y=None):
self.scaler.fit(X[self.columns], y)
self.mean_ = np.mean(X[self.columns])
self.var_ = np.var(X[self.columns])
return self
def transform(self, X, y=None, copy=True):
init_col_order = X.columns
X_scaled = pd.DataFrame(self.scaler.transform(X[self.columns]),
columns=self.columns)
X_not_scaled = X.loc[:,~X.columns.isin(self.columns)]
return pd.concat([X_not_scaled, X_scaled], axis=1)[init_col_order]
-> 这些是我未缩放的列
unscaled_inputs.columns.values
array(['Reason_1', 'Reason_2', 'Reason_3', 'Reason_4', 'Month',
'Day of the week', 'Transportation Expense', 'Distance to Work',
'Age', 'Daily Work Load Average', 'Body Mass Index', 'Education',
'Children', 'Pets'], dtype=object)
-> 我不想缩放的虚拟变量
columns_to_omit = ['Reason_1', 'Reason_2', 'Reason_3', 'Reason_4','Education']
-> 我想缩放的变量
columns_to_scale = [x for x in unscaled_inputs.columns.values if x not in columns_to_omit]
-> 将输入数据“columns_to_scale”提供给我的“CustomScaler”
absenteeism_scaler = CustomScaler(columns_to_scale)
-> 我收到这个警告
C:\Users\prati\Anaconda3\lib\site-packages\sklearn\utils\validation.py:70:
FutureWarning: Pass copy=True, with_mean=True, with_std=True as keyword args. From
version 1.0 (renaming of 0.25) passing these as positional arguments will result in an
error
warnings.warn(f"Pass {args_msg} as keyword args. From version "
-> 当我将“absenteeism_scaler”安装到“unscaled_inputs”时,出现错误,但该错误不会阻止我的代码进一步执行
absenteeism_scaler.fit(unscaled_inputs)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj, include,
exclude)
968
969 if method is not None:
--> 970 return method(include=include, exclude=exclude)
971 return None
972 else:
~\Anaconda3\lib\site-packages\sklearn\base.py in _repr_mimebundle_(self, **kwargs)
462 def _repr_mimebundle_(self, **kwargs):
463 """Mime bundle used by jupyter kernels to display estimator"""
--> 464 output = {"text/plain": repr(self)}
465 if get_config()["display"] == 'diagram':
466 output["text/html"] = estimator_html_repr(self)
~\Anaconda3\lib\site-packages\sklearn\base.py in __repr__(self, N_CHAR_MAX)
258 n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW)
259
--> 260 repr_ = pp.pformat(self)
261
262 # Use bruteforce ellipsis when there are a lot of non-blank characters
~\Anaconda3\lib\pprint.py in pformat(self, object)
151 def pformat(self, object):
152 sio = _StringIO()
--> 153 self._format(object, sio, 0, 0, {}, 0)
154 return sio.getvalue()
155
~\Anaconda3\lib\pprint.py in _format(self, object, stream, indent, allowance, context,
level)
168 self._readable = False
169 return
--> 170 rep = self._repr(object, context, level)
171 max_width = self._width - indent - allowance
172 if len(rep) > max_width:
~\Anaconda3\lib\pprint.py in _repr(self, object, context, level)
402
403 def _repr(self, object, context, level):
--> 404 repr, readable, recursive = self.format(object, context.copy(),
405 self._depth, level)
406 if not readable:
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in format(self, object, context,
maxlevels, level)
178
179 def format(self, object, context, maxlevels, level):
--> 180 return _safe_repr(object, context, maxlevels, level,
181 changed_only=self._changed_only)
182
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _safe_repr(object, context,
maxlevels, level, changed_only)
423 recursive = False
424 if changed_only:
--> 425 params = _changed_params(object)
426 else:
427 params = object.get_params(deep=False)
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _changed_params(estimator)
89 estimator with non-default values."""
90
---> 91 params = estimator.get_params(deep=False)
92 init_func = getattr(estimator.__init__, 'deprecated_original',
93 estimator.__init__)
~\Anaconda3\lib\site-packages\sklearn\base.py in get_params(self, deep)
193 out = dict()
194 for key in self._get_param_names():
--> 195 value = getattr(self, key)
196 if deep and hasattr(value, 'get_params'):
197 deep_items = value.get_params().items()
AttributeError: 'CustomScaler' object has no attribute 'copy'
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
~\Anaconda3\lib\site-packages\IPython\lib\pretty.py in pretty(self, obj)
392 if cls is not object \
393 and callable(cls.__dict__.get('__repr__')):
--> 394 return _repr_pprint(obj, self, cycle)
395
396 return _default_pprint(obj, self, cycle)
~\Anaconda3\lib\site-packages\IPython\lib\pretty.py in _repr_pprint(obj, p, cycle)
698 """A pprint that just redirects to the normal repr function."""
699 # Find newlines and replace them with p.break_()
--> 700 output = repr(obj)
701 lines = output.splitlines()
702 with p.group():
~\Anaconda3\lib\site-packages\sklearn\base.py in __repr__(self, N_CHAR_MAX)
258 n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW)
259
--> 260 repr_ = pp.pformat(self)
261
262 # Use bruteforce ellipsis when there are a lot of non-blank characters
~\Anaconda3\lib\pprint.py in pformat(self, object)
151 def pformat(self, object):
152 sio = _StringIO()
--> 153 self._format(object, sio, 0, 0, {}, 0)
154 return sio.getvalue()
155
~\Anaconda3\lib\pprint.py in _format(self, object, stream, indent, allowance, context,
level)
168 self._readable = False
169 return
--> 170 rep = self._repr(object, context, level)
171 max_width = self._width - indent - allowance
172 if len(rep) > max_width:
~\Anaconda3\lib\pprint.py in _repr(self, object, context, level)
402
403 def _repr(self, object, context, level):
--> 404 repr, readable, recursive = self.format(object, context.copy(),
405 self._depth, level)
406 if not readable:
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in format(self, object, context,
maxlevels, level)
178
179 def format(self, object, context, maxlevels, level):
--> 180 return _safe_repr(object, context, maxlevels, level,
181 changed_only=self._changed_only)
182
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _safe_repr(object, context,
maxlevels, level, changed_only)
423 recursive = False
424 if changed_only:
--> 425 params = _changed_params(object)
426 else:
427 params = object.get_params(deep=False)
~\Anaconda3\lib\site-packages\sklearn\utils\_pprint.py in _changed_params(estimator)
89 estimator with non-default values."""
90
---> 91 params = estimator.get_params(deep=False)
92 init_func = getattr(estimator.__init__, 'deprecated_original',
93 estimator.__init__)
~\Anaconda3\lib\site-packages\sklearn\base.py in get_params(self, deep)
193 out = dict()
194 for key in self._get_param_names():
--> 195 value = getattr(self, key)
196 if deep and hasattr(value, 'get_params'):
197 deep_items = value.get_params().items()
AttributeError: 'CustomScaler' object has no attribute 'copy'
-> 它仍然给我输出,但我不明白,如果我得到一个错误代码应该停止执行?,但它会在之后运行, P.S - 这只是数据的左半部分,因为我无法将所有数据都放在这里,抱歉......
scaled_inputs = absenteeism_scaler.transform(unscaled_inputs)
scaled_inputs
Reason_1 Reason_2 Reason_3 Reason_4 Month Day of the week Transportation
Expense
0 0 0 0 1 0.182726 -0.683704 1.005844
1 0 0 0 0 0.182726 -0.683704 -1.574681
2 0 0 0 1 0.182726 -0.007725 -0.654143
3 1 0 0 0 0.182726 0.668253 0.854936
4 0 0 0 1 0.182726 0.668253 1.005844
更新 1:正如我删除 copy=True,with_mean=True,with_std=True from init,我的错误得到了解决,但我认为缩放后的数据将是“就地”变化以及均值和 S.D 的东西,所以如果我不想对我的数据进行就地更改我该怎么办???
更新2:是因为copy、with_mean和with_std默认为“True”吗?只需检查 sklearn 上的 StandardScaler 库
def _init__(self,columns):
self.scaler = StandardScaler()
self.columns = columns
self.mean = None
self.var_ = None
【问题讨论】:
-
虽然我感谢您继续调试并将这些细节添加到问题中,但它变得有点庞大。尝试将其编辑为一个问题(至少在将来)。
标签: python machine-learning scikit-learn jupyter-notebook python-3.8