【发布时间】:2021-09-22 06:58:56
【问题描述】:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df=pd.read_csv("car-sales-extended.csv")
df.head()
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn. impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
# Modelling
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
np.random.seed(42)
df.dropna(subset=["Price"], inplace=True)
categorical_features=["Make", "Colour"]
categorical_transformer=Pipeline(steps=[
("imputer",SimpleImputer(strategy="constant", fill_value="missing")),
("Onehot", OneHotEncoder(handle_unknown="ignore"))])
door_feature=["Doors"]
door_transformer=Pipeline(steps=[
("imputer",SimpleImputer(strategy="constant", fill_value=4)),
])
numeric_features=["Odometer (KM)"]
numeric_transformer = Pipeline(steps=[
("imputer", SimpleImputer(strategy="mean")),
("scaler", StandardScaler())])
# Setup preprocessing steps (fill the missing values, then convert to numbers)
preprocessor = ColumnTransformer(
transformers=[(
"cat", categorical_transformer, categorical_features),
("door", door_transformer, door_feature),
("num", numeric_features, numeric_transformer)])
#creating a preprocessing and modelling pipeline
model=Pipeline(steps=[("preprocessing", preprocessor),
("model", RandomForestClassifier())])
# Split data
x=df.drop("Price", axis=1)
y=df["Price"]
x_train, x_test, y_train, y_test=train_test_split(x,y,test_size=0.2)
# Fit and score the model
model.fit(x_train, y_train)
model.score(x_test, y_test)
我不知道为什么会出现这个类型错误??
TypeError:所有估计器都应该实现 fit 和 transform,或者可以是 'drop' 或 'passthrough' 说明符。 '['里程表 (KM)']' (type
【问题讨论】:
-
你交换了参数。
-
Rishabh 将来您应该尝试在问题的标题中更具体。
标签: python pandas numpy matplotlib scikit-learn