【发布时间】:2019-09-24 08:46:51
【问题描述】:
我有一本字典,我试图在其中循环以在 pandas 数据帧上运行一个函数。该功能允许用户手动选择 GLM 中唯一变量的级别以设置为参考/统一值。该功能在手动运行时起作用。使用字典时出现broadcast 错误。
下面是一些示例代码:
import pandas as pd
import numpy as np
#function below
def rebase1(df, variable1, unity_value):
"""
rebase the factors according to where the modeler wants the unity to be
"""
if ('factor_rebased' not in df.columns): df['factor_rebased'] = 0
base_factor = df[(df['variable'] == variable1) & (df['level'] == unity_value)]['factor'].values
filters = df['variable'].eq(variable1)
if filters.any():
df.loc[filters, 'factor_rebased'] = df.loc[filters, 'factor']/base_factor
#Sample data below
df3 = {'variable': ['intercept', 'CLded_model','CLded_model','CLded_model','CLded_model','CLded_model','CLded_model','CLded_model'
,'married_age','married_age','married_age', 'class_cc', 'class_cc', 'class_cc', 'class_cc', 'class_v_age'
,'class_v_age','class_v_age', 'class_v_age'],
'level': [None,0,100,200,250,500,750,1000, 60, 61, 62, 100, 1200, 1500, 100
,10, 20, 15, 10],
'value': [None, 460955.7793,955735.0532,586308.4028,12216916.67,48401773.87,1477842.472,14587994.92,10493740.36,36388470.44
,31805316.37, 123.4, 4546.50, 439854.23, 2134.4, 2304.5, 2032.30, 159.80, 22],
'coefficient': [-2.36E-14, 0.00174356, 0.00174356, 0.00174356, 0.00174356, 0.00174356 ,0.00174356 , 0.00174356
,-1.004648e-02, -1.004648e-02,-1.071730e-02,-1.812330e-04,-1.812330e-04,8.727980e-04,1.402564e-03
,-1.681685e-01, -8.442040e-02, -1.812330e-04, -1.465950e-01],
'factor': [ None, 1. , 1.1904793 , 1.41724097, 1.54633869,
2.39116334, 3.69754838, 5.71766211, 0.54728324, 0.5418125 ,
0.51454483, 0.98203994, 0.80454402, 3.70319885, 1.15056877,
0.1860602 , 0.18481351, 0.9972852 , 0.23085857]}
results = pd.DataFrame(df3)
results['factor'] = np.exp(results['level']*results['coefficient'])
results
#dictionary to loop through
unity_value = {'CLded_model':500, 'married_age':61, 'class_cc':1200, 'class_v_age':10}
#trying to run the loop
for key, values in unity_value.items():
rebase1(results, key, values)
以下是错误:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-487-0be11e7c9811> in <module>
2
3 for key, values in unity_value.items():
----> 4 rebase1(results, key, values)
<ipython-input-486-4c3fb8398702> in rebase1(df, variable1, unity_value)
19 filters = df['variable'].eq(variable1)
20 if filters.any():
---> 21 df.loc[filters, 'factor_rebased'] = df.loc[filters, 'factor']/base_factor
22
23 # why return? You already update df['factor_rebased']
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\ops.py in wrapper(left, right)
1067 rvalues = rvalues.values
1068
-> 1069 result = safe_na_op(lvalues, rvalues)
1070 return construct_result(left, result,
1071 index=left.index, name=res_name, dtype=None)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\ops.py in safe_na_op(lvalues, rvalues)
1031 try:
1032 with np.errstate(all='ignore'):
-> 1033 return na_op(lvalues, rvalues)
1034 except Exception:
1035 if is_object_dtype(lvalues):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\ops.py in na_op(x, y)
1010
1011 try:
-> 1012 result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs)
1013 except TypeError:
1014 if isinstance(y, (np.ndarray, ABCSeries, pd.Index)):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\computation\expressions.py in evaluate(op, op_str, a, b, use_numexpr, **eval_kwargs)
203 use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b)
204 if use_numexpr:
--> 205 return _evaluate(op, op_str, a, b, **eval_kwargs)
206 return _evaluate_standard(op, op_str, a, b)
207
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\computation\expressions.py in _evaluate_numexpr(op, op_str, a, b, truediv, reversed, **eval_kwargs)
118
119 if result is None:
--> 120 result = _evaluate_standard(op, op_str, a, b)
121
122 return result
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\computation\expressions.py in _evaluate_standard(op, op_str, a, b, **eval_kwargs)
63 _store_test_result(False)
64 with np.errstate(all='ignore'):
---> 65 return op(a, b)
66
67
ValueError: operands could not be broadcast together with shapes (4,) (2,)
【问题讨论】:
-
您的缩进已关闭。
标签: python pandas loops dictionary