数据集官网下载;
jupyter notebook 实现;
import numpy as np import pandas as pd import matplotlib.pyplot as plt fname = \'E:\\pythonwork\\project\\Deeplearning\\Task\\data\\iris.data\' with open(fname, \'r+\', encoding=\'utf-8\') as f: s = [i[:-1].split(\',\') for i in f.readlines()] # 读取TXT,逗号为分隔符 # pandas读取数据 样本数为各50个 names=[\'slength\',\'swidth\',\'plength\',\'pwidth\',\'name\'] iris = pd.DataFrame(data=s, columns=names) # 删除一个莫名其妙的空行: iris.dropna(axis=0, how=\'any\', inplace=True) # 有三种类别: seto = iris.iloc[0:50,:] vers = iris.iloc[50:100,:] virg = iris.iloc[100:150,:] seto.shape vers.shape # 统计每个品种有多少个样本 iris[\'name\'].value_counts() # 字符串类型的数据变成float(否则不能画图) iris.iloc[:,:4]=iris.iloc[:,:4].astype(\'float\') # 画出slength和swidth的关系图 plt.scatter(x=iris[\'slength\'],y=iris[\'swidth\']) plt.show()
#------------------- # 按颜色不同分类 画图 plt.scatter(x=seto[\'slength\'],y=seto[\'swidth\'],color=\'red\') plt.scatter(x=vers[\'slength\'],y=seto[\'swidth\'],color=\'blue\',marker="+") plt.scatter(x=virg[\'slength\'],y=seto[\'swidth\'],color=\'green\',marker=\'*\') plt.xlabel(\'s length\') plt.ylabel(\'s width\') plt.show()