pandas分组统计:groupby,melt,pivot_table,crosstab的用法
groupby: 分组
melt: 宽表转长表
pivot_table: 长表转宽表,数据透视表
crosstab: 交叉表 / 列联表,主要用于分组频数统计
import numpy as np
import pandas as pd
df = pd.DataFrame({\'key1\':[\'a\',\'a\',\'b\',\'b\',\'a\'],
\'key2\':[\'one\',\'two\',\'one\',\'two\',\'one\'],
\'data1\':np.random.randn(5),
\'data2\':np.random.randn(5)
})
# key1 key2 data1 data2
#0 a one 0.498857 0.074495
#1 a two 2.872086 -1.303973
#2 b one -0.721171 -1.315390
#3 b two 0.985844 0.419780
#4 a one -0.134534 -0.732861
# ####################
\'\'\'groupby 用法\'\'\'
group1 = df.groupby(\'key1\')
group2 = df.groupby([\'key1\',\'key2\'])
[x for x in group1]
group1.size()
group1.sum()
group2.count()
group1[\'data1\',\'data2\'].agg([\'mean\',\'sum\']) #作用于所有列
group2([\'key1\',\'key2\']).apply(lambda x: pd.Series([x.shape[0], x[\'key1\'].mean(), x[\'key2\'].sum()],
index=[\'counts\', \'key1_mean\', \'key2_sum\'])) #作用于指定列
# ####################
\'\'\' melt 用法 -- 宽表转长表 \'\'\'
pd.melt(df, id_vars=[\'key1\', \'key2\'], value_vars=[\'data1\', \'data2\'], var_name=\'var\', value_name=\'value\') #col_level
# key1 key2 var value
#0 a one data1 0.498857
#1 a two data1 2.872086
#2 b one data1 -0.721171
#3 b two data1 0.985844
#4 a one data1 -0.134534
#5 a one data2 0.074495
#6 a two data2 -1.303973
#7 b one data2 -1.315390
#8 b two data2 0.419780
#9 a one data2 -0.732861
# ####################
\'\'\' crosstab 用法 -- 列联表(count) \'\'\'
pd.crosstab(df.key1, df.key2, margins=True)
#key2 one two All
#key1
#a 2 1 3
#b 1 1 2
#All 3 2 5
# ####################
\'\'\' pivot_table 用法 -- 长表转宽表 \'\'\'
# pd.pivot_table(data, values=None, index=None, columns=None, aggfunc=\'mean\', fill_value=None,
# dropna=True, margins=False, margins_name=\'ALL\') #aggfunc={\'d\':np.sum, \'e\':np.max}
pd.pivot_table(df, index=\'key1\', columns=\'key2\')
# data1 data2
#key2 one two one two
#key1
#a 0.182162 2.872086 -0.329183 -1.303973
#b -0.721171 0.985844 -1.315390 0.419780
df.pivot_table([\'data1\'], index=\'key1\', columns=\'key2\', fill_value=0) #[\'data1\']
# data1
#key2 one two
#key1
#a 0.182162 2.872086
#b -0.721171 0.985844
参考链接: