描述https://github.com/schmit/cme193-ipython-notebooks-lecture/blob/master/Exercises.ipynb
代码:
import random import numpy as np import scipy as sp import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import statsmodels.api as sm import statsmodels.formula.api as smf sns.set_context("talk") anascombe = pd.read_csv("data/anscombe.csv") anascombe.head() print(anascombe.groupby('dataset')['x'].agg([len,np.sum, np.mean, np.var])) print(anascombe.groupby('dataset')['y'].agg([len,np.sum, np.mean, np.var])) print('Icorr:' + str(anascombe['x'][0:11].corr(anascombe['y'][0:11]))) print('II:corr' + str(anascombe['x'][11:22].corr(anascombe['y'][11:22]))) print('IIIcorr:' + str(anascombe['x'][22:33].corr(anascombe['y'][22:33]))) print('IV:corr' + str(anascombe['x'][33:44].corr(anascombe['y'][33:44]))) sns.lmplot('x', 'y', anascombe[0:11]) sns.lmplot('x', 'y', anascombe[11:22]) sns.lmplot('x', 'y', anascombe[22:33]) sns.lmplot('x', 'y', anascombe[33:44]) plt.show()
结果:
len sum mean var
dataset
I 11.0 99.0 9.0 11.0
II 11.0 99.0 9.0 11.0
III 11.0 99.0 9.0 11.0
IV 11.0 99.0 9.0 11.0
len sum mean var
dataset
I 11.0 82.51 7.500909 4.127269
II 11.0 82.51 7.500909 4.127629
III 11.0 82.50 7.500000 4.122620
IV 11.0 82.51 7.500909 4.123249
Icorr:0.81642051634484
II:corr0.8162365060002427
IIIcorr:0.8162867394895982
IV:corr0.8165214368885031