sklearn实现
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import numpy as np
import pandas as pd
data_sets=load_iris()
x=data_sets.data
y=data_sets.target
data=np.c_[x,y]
cols=data_sets.feature_names+['class']
df=pd.DataFrame(data,columns=cols)
print(df.shape)
df.head()
(150, 5)
|
sepal length (cm) |
sepal width (cm) |
petal length (cm) |
petal width (cm) |
class |
| 0 |
5.1 |
3.5 |
1.4 |
0.2 |
0.0 |
| 1 |
4.9 |
3.0 |
1.4 |
0.2 |
0.0 |
| 2 |
4.7 |
3.2 |
1.3 |
0.2 |
0.0 |
| 3 |
4.6 |
3.1 |
1.5 |
0.2 |
0.0 |
| 4 |
5.0 |
3.6 |
1.4 |
0.2 |
0.0 |
X=df.loc[:,df.columns!='class']
y=df['class']
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.3,shuffle=True,random_state=1)
dtree=DecisionTreeClassifier(criterion="gini",
splitter="best",
max_depth=None,
min_samples_split=2,
min_samples_leaf=1,
min_weight_fraction_leaf=0,
max_features=None,
max_leaf_nodes=None,
min_impurity_decrease=0,
class_weight=None)
dtree.fit(x_train,y_train)
features_im=dtree.feature_importances_.tolist()
print('特征重要性:',dict(zip(X.columns,features_im)))
y_pred=dtree.predict(x_test)
score=dtree.score(x_test,y_test)
print("准确率:",score)
特征重要性: {'sepal length (cm)': 0.02146946564885496, 'sepal width (cm)': 0.02146946564885496, 'petal length (cm)': 0.5719647633364664, 'petal width (cm)': 0.3850963053658237}
准确率: 0.9555555555555556
from IPython.display import Image
from sklearn import tree
import pydotplus
dot_data = tree.export_graphviz(dtree,
out_file=None,
feature_names=data_sets.feature_names,
class_names=data_sets.target_names,
filled=True,
rounded=True,
special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data)
Image(graph.create_png())
