如果您正在处理字符串值:
s = pd.DataFrame(['A','A','A','BB','BB','CC','A','A','BB'], columns=['a'])
string_groups = sum([['%s_%s' % (i,n) for i in g] for n,(k,g) in enumerate(itertools.groupby(s.a))],[])
>>> string_groups
['A_0', 'A_0', 'A_0', 'BB_1', 'BB_1', 'CC_2', 'A_3', 'A_3', 'BB_4']
grouped = s.groupby(string_groups, sort=False).agg(list)
grouped.index = grouped.index.str.split('_').str[0]
>>> grouped
a
A [A, A, A]
BB [BB, BB]
CC [CC]
A [A, A]
BB [BB]
作为一个单独的函数:
def groupby_consec(df, col):
string_groups = sum([['%s_%s' % (i, n) for i in g]
for n, (k, g) in enumerate(itertools.groupby(df[col]))], [])
return df.groupby(string_groups, sort=False)