【发布时间】:2016-04-28 22:59:58
【问题描述】:
我无法通过 rpy2 从 python 调用 R 函数 cforest(包partykit)。 我怀疑这与问题here 有某种关系。 在我看来,问题在于公式参数(例如公式 = y ~ 1 + x1 + x2)。 似乎我做的一切都是正确的,因为我可以调用函数 lm(库统计),它也接受一个公式参数。
下面的代码显示了我正在尝试做的事情(set method = 0, = 1, = 2 用于不同的调用样式,= 3 用于测试 lm 函数)。
method = 1
import pandas as pd
import numpy as np
import subprocess
import rpy2.robjects as robjects
import rpy2.robjects.packages as rpackages
from rpy2.robjects.packages import importr
import pandas.rpy.common as com
from rpy2.robjects import Formula
X_train = np.random.rand(500,6)
y_train = np.random.rand(500,1)
ntree = 2
mtry = 5
pk = importr('partykit')
stats = importr('stats')
base = importr('base')
#create dataframes in Python, assign labels consistent with formula below
nx = X_train.shape[1]
columns = ['y']
for i in range(nx):
columns.append('x' + str(i))
datatrain = pd.DataFrame(data=np.hstack((y_train, X_train)), columns=columns)
#convert to R dataframe
r_datatrain = com.convert_to_r_dataframe(datatrain)
#arguments
ctrl = pk.ctree_control(mtry = mtry)
if method == 0:
robjects.r('''
f <- function(data, ntree, mtry, verbose=FALSE) {
if (verbose) {
cat("I am calling f().\n")
}
ctrl = ctree_control(mtry = mtry)
cforest(formula = y ~ ., data = data, ntree = ntree, control = ctrl)
}
''')
r_f = robjects.r('f')
obj = r_f(r_datatrain, ntree, mtry, True)
elif method == 1:
#arguments
obj = pk.cforest('formula = y ~ 1 + x1 + x2', data = r_datatrain, ntree = ntree, control = ctrl)
elif method == 2:
fmla = Formula('x1 ~ x2')
env = fmla.environment
env['ntree'] = ntree
env['ctrl'] = ctrl
env['r_datatrain'] = r_datatrain
obj = robjects.r('cforest(%s, data = r_datatrain, ntree = ntree, control = ctrl)' %fmla.r_repr())
#obj = pk.cforest("formula = y ~ 1 + x1 + x2", data = r_datatrain, ntree = ntree, control = ctrl)
else:
obj = stats.lm("formula = y ~ 1 + x1 + x2", data = r_datatrain)
print(obj)
错误信息
方法 = 0
I am calling f().
/usr/local/lib/python2.7/dist-packages/rpy2/robjects/functions.py:106: UserWarning: Error in .cnode(1L, data, infl, inputs, weights, ctrl) :
R_ExpCovLinstat: y does not have 500 rows
res = super(Function, self).__call__(*new_args, **new_kwargs)
Traceback (most recent call last):
File "r2py_issues.py", line 47, in <module>
obj = r_f(r_datatrain, ntree, mtry, True)
File "/usr/local/lib/python2.7/dist-packages/rpy2/robjects/functions.py", line 178, in __call__
return super(SignatureTranslatedFunction, self).__call__(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/rpy2/robjects/functions.py", line 106, in __call__
res = super(Function, self).__call__(*new_args, **new_kwargs)
rpy2.rinterface.RRuntimeError: Error in .cnode(1L, data, infl, inputs, weights, ctrl) :
R_ExpCovLinstat: y does not have 500 rows
方法 = 1
/usr/local/lib/python2.7/dist-packages/rpy2/robjects/functions.py:106: UserWarning: Error: inherits(object, "formula") is not TRUE
res = super(Function, self).__call__(*new_args, **new_kwargs)
Traceback (most recent call last):
File "r2py_issues.py", line 50, in <module>
obj = pk.cforest('formula = y ~ 1 + x1 + x2', data = r_datatrain, ntree = ntree, control = ctrl)
File "/usr/local/lib/python2.7/dist-packages/rpy2/robjects/functions.py", line 178, in __call__
return super(SignatureTranslatedFunction, self).__call__(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/rpy2/robjects/functions.py", line 106, in __call__
res = super(Function, self).__call__(*new_args, **new_kwargs)
rpy2.rinterface.RRuntimeError: Error: inherits(object, "formula") is not TRUE
方法 = 2
/usr/local/lib/python2.7/dist-packages/rpy2/robjects/functions.py:106: UserWarning: Error in .cnode(1L, data, infl, inputs, weights, ctrl) :
R_ExpCovLinstat: y does not have 500 rows
res = super(Function, self).__call__(*new_args, **new_kwargs)
Traceback (most recent call last):
File "r2py_issues.py", line 58, in <module>
obj = robjects.r('cforest(%s, data = r_datatrain, ntree = ntree, control = ctrl)' %fmla.r_repr())
File "/usr/local/lib/python2.7/dist-packages/rpy2/robjects/__init__.py", line 321, in __call__
res = self.eval(p)
File "/usr/local/lib/python2.7/dist-packages/rpy2/robjects/functions.py", line 178, in __call__
return super(SignatureTranslatedFunction, self).__call__(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/rpy2/robjects/functions.py", line 106, in __call__
res = super(Function, self).__call__(*new_args, **new_kwargs)
rpy2.rinterface.RRuntimeError: Error in .cnode(1L, data, infl, inputs, weights, ctrl) :
R_ExpCovLinstat: y does not have 500 rows
【问题讨论】:
标签: python random-forest rpy2