这似乎符合要求
import pandas as pd
import numpy as np
data = np.array([['5/1/21', True, '0 (no preceding rows)'],
['5/6/21', True, '1'],
['5/8/21', False,
'2 (immediately preceded by streak of 2 TRUE rows in past 7 days)'],
['5/10/21', False, '0'],
['5/11/21', True, '0'],
['5/14/21', True, '1 (preceding row is TRUE)'],
['5/20/21', True,
'1 (immediately preceded by streak of one TRUE rows in 1 week window)'],
['5/21/21', True,
'2 (immediately preceded by streak of two TRUE rows in 1 week window)'],
['5/22/21', True,
'2 (immediately preceded by streak of two TRUE rows in 1 week window)'],
['5/23/21', False,
'3 (immediately preceded by streak of three TRUE rows in 1 week window)'],
['5/24/21', True, '0 (preceded by FALSE row)'],
['5/26/21', True,
'1 (immediately preceded by streak of 1 TRUE row)'],
['5/27/21', True,
'2 (immediately preceded by streak of 2 TRUE row)']])
df = pd.DataFrame(data = data, columns = ['time','result','output_check'])
df['time'] = pd.to_datetime(df['time'])
df = df.set_index('time')
# i will note that this casting of the result to num shouldn't be necessary,
# but something in my process is being wonky and I'm too lazy to sort why
# pandas is not willing atm to sum a boolean column
df['result_num'] = np.where(df['result'] == 'True', 1, 0)
df['result_num_vice'] = np.where(df['result'].shift(1) == 'False', 1, 0)
# each time that we hit a zero, we restart the counter so this is basically a
# group, so lets use cum sum to create a counter that increases by 1 each time
# we hit a zero. We can then use this counter as the id.
df['id'] = df['result_num_vice'].transform('cumsum')
df['output'] = (
df.groupby(['id'])['result_num'].apply(
lambda x:x.rolling('8d', closed = 'right').sum()
)
)
# each true row after the initial will include itself in the count, so lets just
# subtract one from each row with true
df['output'] = np.where(
(df['result_num'] == 1) & (df['output']>0),
df['output'] - 1,
df['output']
)
df = df[['result','output_check','output']]
df
输出:
result output_check output
time
2021-05-01 True 0 (no preceding rows) 0.0
2021-05-06 True 1 1.0
2021-05-08 False 2 (immediately preceded by streak of 2 TRUE ro... 2.0
2021-05-10 False 0 0.0
2021-05-11 True 0 0.0
2021-05-14 True 1 (preceding row is TRUE) 1.0
2021-05-20 True 1 (immediately preceded by streak of one TRUE ... 1.0
2021-05-21 True 2 (immediately preceded by streak of two TRUE ... 2.0
2021-05-22 True 2 (immediately preceded by streak of two TRUE ... 2.0
2021-05-23 False 3 (immediately preceded by streak of three TRU... 3.0
2021-05-24 True 0 (preceded by FALSE row) 0.0
2021-05-26 True 1 (immediately preceded by streak of 1 TRUE row) 1.0
2021-05-27 True 2 (immediately preceded by streak of 2 TRUE row) 2.0