【发布时间】:2020-10-20 10:26:38
【问题描述】:
我正在尝试通过 scikitlearn、crossvalidate 来做一个简单的交叉验证过程,我得到以下 TypeError:
TypeError Traceback (most recent call last)
<ipython-input-59-0471fb78d8f0> in <module>
5
6 model = NMF(n_components=185, init='random', random_state=0)
----> 7 scores = cross_validate(model, df4_array, cv=5, scoring=('neg_mean_squared_error'))
8 W = model.fit_transform(df4_array)
9 H = model.components_
~\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
234 return_times=True, return_estimator=return_estimator,
235 error_score=error_score)
--> 236 for train, test in cv.split(X, y, groups))
237
238 zipped_scores = list(zip(*scores))
C:\ProgramData\Anaconda3\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
919 # remaining jobs.
920 self._iterating = False
--> 921 if self.dispatch_one_batch(iterator):
922 self._iterating = self._original_iterator is not None
923
C:\ProgramData\Anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
757 return False
758 else:
--> 759 self._dispatch(tasks)
760 return True
761
C:\ProgramData\Anaconda3\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
714 with self._lock:
715 job_idx = len(self._jobs)
--> 716 job = self._backend.apply_async(batch, callback=cb)
717 # A job can complete so quickly than its callback is
718 # called before we get here, causing self._jobs to
C:\ProgramData\Anaconda3\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback)
180 def apply_async(self, func, callback=None):
181 """Schedule a func to be run"""
--> 182 result = ImmediateResult(func)
183 if callback:
184 callback(result)
C:\ProgramData\Anaconda3\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch)
547 # Don't delay the application, to avoid keeping the input
548 # arguments in memory
--> 549 self.results = batch()
550
551 def get(self):
C:\ProgramData\Anaconda3\lib\site-packages\joblib\parallel.py in __call__(self)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
C:\ProgramData\Anaconda3\lib\site-packages\joblib\parallel.py in <listcomp>(.0)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
~\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
542 else:
543 fit_time = time.time() - start_time
--> 544 test_scores = _score(estimator, X_test, y_test, scorer)
545 score_time = time.time() - start_time - fit_time
546 if return_train_score:
~\AppData\Roaming\Python\Python37\site-packages\sklearn\model_selection\_validation.py in _score(estimator, X_test, y_test, scorer)
587 scorer = _MultimetricScorer(**scorer)
588 if y_test is None:
--> 589 scores = scorer(estimator, X_test)
590 else:
591 scores = scorer(estimator, X_test, y_test)
~\AppData\Roaming\Python\Python37\site-packages\sklearn\metrics\_scorer.py in __call__(self, estimator, *args, **kwargs)
85 if isinstance(scorer, _BaseScorer):
86 score = scorer._score(cached_call, estimator,
---> 87 *args, **kwargs)
88 else:
89 score = scorer(estimator, *args, **kwargs)
TypeError: _score() missing 1 required positional argument: 'y_true'
我不知道为什么,因为我正在尝试使用非负矩阵分解为 tue 大学做一个推荐系统,这是一种无监督的方法......代码不应该在没有 y 的情况下工作吗?
代码:
from sklearn.decomposition import NMF
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
model = NMF(n_components=185, init='random', random_state=0)
scores = cross_validate(model, df4_array, cv=5, scoring=('neg_mean_squared_error'))
W = model.fit_transform(df4_array)
H = model.components_
【问题讨论】:
-
请提供完整的回溯。
-
抱歉这个问题,但是,您是指完整的代码吗?
-
不,我认为错误在提供的代码中。我的意思是完整的错误回溯。
-
要了解交叉验证错误,我认为您会发现此链接很有用:stats.stackexchange.com/questions/354611/…
标签: python scikit-learn