选项 1
不太明显但没有硬编码的值。
from itertools import product
d_ = df.set_index('Product')
prc = d_.pop('Price')
d_.columns = d_.columns.str.split('_', expand=True)
c = d_.columns
l0 = c.levels[0]
l1 = c.levels[1]
b0 = c.labels[0]
b1 = c.labels[1]
r0 = range(len(l0))
ptups = list(product(*(l1[b1][b0 == i] for i in r0)))
midx = pd.MultiIndex.from_tuples(
[(x,) + t for x in l0 for t in ptups],
names=['key'] + l0.tolist()
)
n = midx.nlevels
_d = d_[[(x0, x1) for x0, y1 in zip(l0, zip(*ptups)) for x1 in y1]]
_d.columns = midx
_d = _d.stack(list(range(1, n)), dropna=False)
_d.fillna(0).sum(1).where(_d.notna().any(1)).reset_index(name='SUM_values')
Product CS SC SUM_values
0 R123 Medium A 0.15
1 R123 Medium B 0.15
2 R123 Medium C 0.53
3 R123 Small A NaN
4 R123 Small B NaN
5 R123 Small C 0.38
6 R234 Medium A 0.04
7 R234 Medium B NaN
8 R234 Medium C 0.05
9 R234 Small A 0.01
10 R234 Small B -0.03
11 R234 Small C 0.02
选项 2
使用defaultdict 和for 循环
from collections import defaultdict
d = defaultdict(list)
for c in df.columns:
k, *v = c.split('_')
if v:
d[k].append(v[0])
pd.DataFrame([
[row.Product, c, s, row.Price, row[f'CS_{c}'], row[f'SC_{s}']]
for i, row in df.iterrows()
for c in d['CS'] for s in d['SC']
], columns='Product CS SC Price CS_v SC_v'.split()).assign(
SUM_values=lambda d: d.CS_v.add(d.SC_v, fill_value=0)
).drop(['CS_v', 'SC_v'], 1)
Product CS SC Price SUM_values
0 R123 Medium A 1.18 0.15
1 R123 Medium B 1.18 0.15
2 R123 Medium C 1.18 0.53
3 R123 Small A 1.18 NaN
4 R123 Small B 1.18 NaN
5 R123 Small C 1.18 0.38
6 R234 Medium A 0.23 0.04
7 R234 Medium B 0.23 NaN
8 R234 Medium C 0.23 0.05
9 R234 Small A 0.23 0.01
10 R234 Small B 0.23 -0.03
11 R234 Small C 0.23 0.02
选项 3
使用defaultdict、itertools.product和lookup
from itertools import product
from collections import defaultdict
d = defaultdict(list)
for c in df.columns:
k, *v = c.split('_')
if v:
d[k].append(v[0])
d = {**df[['Product']].to_dict('l'), **d}
d_ = df.set_index('Product')
ndf = pd.DataFrame(dict(zip(d.keys(), zip(*product(*d.values())))))
cs = pd.Series(d_.lookup(ndf.Product, ndf.CS.radd('CS_')), ndf.index)
sc = pd.Series(d_.lookup(ndf.Product, ndf.SC.radd('SC_')), ndf.index)
ndf['SUM_values'] = cs.add(sc, fill_value=0)
ndf[['Product', 'CS', 'SC', 'SUM_values']]
Product CS SC SUM_values
0 R123 Medium A 0.15
1 R123 Medium B 0.15
2 R123 Medium C 0.53
3 R123 Small A NaN
4 R123 Small B NaN
5 R123 Small C 0.38
6 R234 Medium A 0.04
7 R234 Medium B NaN
8 R234 Medium C 0.05
9 R234 Small A 0.01
10 R234 Small B -0.03
11 R234 Small C 0.02