pivot_table() crosstab()

1 pivot_table()函数它根据一个或多个键对数据进行聚合,默认对聚合后的数据求均值
import pandas as pd
import numpy as np
df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo",
                         "bar", "bar", "bar", "bar"],
                   "B": ["one", "one", "one", "two", "two",
                         "one", "one", "two", "two"],
                   "C": ["small", "large", "large", "small",
                         "small", "large", "small", "small",
                         "large"],
                   "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
                   "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]})

print(df)
# data     接收DataFrame。表示透视表的数据。无默认。
# values     接收字符串。用于指定想要聚合的数据字段名，默认使用全部数据。默认为None。
# index     接收string或list。表示行分组键。默认为None。
# columns     接收string或list。表示列分组键。默认为None。
# aggfunc     接收functions。表示聚合函数。默认为mean。
# fill_value     接受scalar。表示是否将fill_value的数值代替缺失值。默认为None。
# margins     接收boolearn。表示汇总（Total）功能的开关，设为True后结果集中会出现名为“ALL”的行和列。默认为True。
# dropna     接收boolearn。表示是否删掉全为NaN的列。默认为False。
# margins_name      接收string。表示margins为True时，'All'的名称。
table = df.pivot_table(values='D',        # values默认为数值类型的列,
                       index=['A', 'B'],  # index是行索引
                       columns=['C'],     # columns是列索引,
                       aggfunc=np.sum)    # aggfunc用于指定对聚合后的数据进行操作的函数
print(table)
#      A    B      C  D  E
# 0  foo  one  small  1  2
# 1  foo  one  large  2  4
# 2  foo  one  large  2  5
# 3  foo  two  small  3  5
# 4  foo  two  small  3  6
# 5  bar  one  large  4  6
# 6  bar  one  small  5  8
# 7  bar  two  small  6  9
# 8  bar  two  large  7  9
# C        large  small
# A   B                
# bar one    4.0    5.0
#     two    7.0    6.0
# foo one    4.0    1.0
#     two    NaN    6.0
View Code