python机器学习之decisiontreeclassifier

#决策树算法的原理是一系列if_else的逻辑迭代。适用于对数据进行分类和回归，优点是对于数据的本身要求不高，直观容易理解，缺点是容易过拟合和泛化能力不强。对于回归而言，不能外推。

from sklearn.tree import DecisionTreeClassifier

import matplotlib.pyplot as plt

import pandas as pd

import numpy as np

from sklearn.datasets import load_breast_cancer

from sklearn.model_selection import train_test_split

cancer=load_breast_cancer()

x_train,x_test,y_train,y_test=train_test_split(cancer.data,cancer.target,stratify=cancer,random_state=42)

tree=DecisionTreeClassifier()

tree.fit(x_train,y_train)

tree.score(x_train,y_train)

tree.score(x_test,y_test)

#结果显示过拟合，预剪枝max_depth

tree04=DecisionTreeClassifier(max_depth=4,random_state=0)

tree04.fit(x_train,y_train)

tree04.score(x_train,y_train)

tree04.score(x_test,y_test)

#针对决策树缺点的集成