【问题标题】:Function responds differently to similar formula imput函数对相似公式输入的响应不同
【发布时间】:2021-04-15 01:11:54
【问题描述】:

我正在使用这个函数,但是我在下面一行中得到一个错误:

  X <- cbind(`(Intercept)`=1, DATA[, c(vn$s1[1], vn$s2[-(1:2)]), F])

编辑:这行可能出错了:

  vn <- lapply(list(s1=s1, s2=s2), function(s)
    c(all.vars(s$call)[1], colnames(model.matrix(s))[-1]))

功能(数据在底部):

library(AER)
somefun <- function(s1, s2, data, type=2) {
      ## turn factor variables into dummies
      DATA <- as.data.frame(model.matrix(phantom ~ ., transform(data, phantom=0)))
      ## list variable names
      vn <- lapply(list(s1=s1, s2=s2), function(s)
        c(all.vars(s$call)[1], colnames(model.matrix(s))[-1]))
      ## auxilliary model matrix
      print(vn$s1[1])
      print(vn$s2[-(1:2)])
      X <- cbind(`(Intercept)`=1, DATA[, c(vn$s1[1], vn$s2[-(1:2)]), F])
      ## get y
      y <- DATA[, vn$s2[1]] 
    }

当我这样运行它时,它工作正常:

s1 <- AER::tobit(taxrate ~ votewon + industry + size + urbanisation + vote,
                  left=12, right=33, data=DF)
s2 <- lm(sales ~ yhat + industry + size + urbanisation + vote, data=DF)
res <- somefun(s1, s2, DF)

当我这样运行它时,它会崩溃:

form_1st <- as.formula("taxrate ~ votewon + industry + size + urbanisation + vote")
form_2nd <- as.formula("sales ~ yhat + industry + size + urbanisation + vote")
s1 <- AER::tobit(form_1st,
                  left=12, right=33, data=DF)
s2 <- lm(form_2nd , data=DF)
res <- somefun(s1, s2, DF)

Error in `[.data.frame`(DATA, , vn$s2[1]) : undefined columns selected

即使打印的列(来自函数内部)是相同的。为什么会发生这种情况,我该如何解决?

编辑:我根据无效的答案尝试了几件事:

# Before creating vn
s2$call$formula <- all.vars(eval(s2$call$formula))
Error in s2$call$formula <- all.vars(eval(s2$call$formula)) : 
  object 's2' not found

# Aftercreating vn
vn[["s2"]] <- all.vars(eval(s2$call$formula))
vn[[2]] <- all.vars(eval(s2$call$formula))
Error in vn[[2]] <- all.vars(eval(s2$call$formula)) : 
object 'vn' not found

# While creating vn

vn <- lapply(list(s1=s1, s2=s2), function(s)
       c(all.vars(eval(s$call))[1], colnames(model.matrix(s))[-1]))

数据

DF <- structure(list(country = c("C", "C", "C", "C", "J", "J", "B", 
"B", "F", "F", "E", "E", "D", "D", "F", "F", "I", "I", "J", "J", 
"E", "E", "C", "C", "I", "I", "I", "I", "I", "I", "C", "C", "H", 
"H", "J", "J", "G", "G", "J", "J", "I", "I", "C", "C", "D", "D", 
"A", "A", "G", "G", "E", "E", "J", "J", "G", "G", "I", "I", "I", 
"I", "J", "J", "G", "G", "E", "E", "G", "G", "E", "E", "F", "F", 
"I", "I", "B", "B", "E", "E", "H", "H", "B", "B", "A", "A", "I", 
"I", "I", "I", "F", "F", "E", "E", "I", "I", "J", "J", "D", "D", 
"F", "F"), year = c(2005, 2010, 2010, 2005, 2005, 2010, 2010, 
2005, 2010, 2005, 2005, 2010, 2010, 2005, 2005, 2010, 2005, 2010, 
2005, 2010, 2010, 2005, 2010, 2005, 2005, 2010, 2005, 2010, 2010, 
2005, 2010, 2005, 2005, 2010, 2010, 2005, 2005, 2010, 2005, 2010, 
2005, 2010, 2005, 2010, 2010, 2005, 2005, 2010, 2010, 2005, 2010, 
2005, 2010, 2005, 2010, 2005, 2010, 2005, 2010, 2005, 2010, 2005, 
2010, 2005, 2010, 2005, 2010, 2005, 2005, 2010, 2005, 2010, 2005, 
2010, 2005, 2010, 2005, 2010, 2005, 2010, 2010, 2005, 2005, 2010, 
2005, 2010, 2010, 2005, 2010, 2005, 2010, 2005, 2005, 2010, 2005, 
2010, 2010, 2005, 2010, 2005), sales = c(15.48, 12.39, 3.72, 
23.61, 4, 31.87, 25.33, 7.64, -0.26, 2.9, 15.48, 12.39, 3.72, 
23.61, 4, 31.87, 25.33, 7.64, -0.26, 2.9, 15.48, 12.39, 3.72, 
23.61, 4, 31.87, 25.33, 7.64, -0.26, 2.9, 15.48, 12.39, 3.72, 
23.61, 4, 31.87, 25.33, 7.64, -0.26, 2.9, 15.48, 12.39, 3.72, 
23.61, 4, 31.87, 25.33, 7.64, -0.26, 2.9, 15.48, 12.39, 3.72, 
23.61, 4, 31.87, 25.33, 7.64, -0.26, 2.9, 15.48, 12.39, 3.72, 
23.61, 4, 31.87, 25.33, 7.64, -0.26, 2.9, 15.48, 12.39, 3.72, 
23.61, 4, 31.87, 25.33, 7.64, -0.26, 2.9, 15.48, 12.39, 3.72, 
23.61, 4, 31.87, 25.33, 7.64, -0.26, 2.9, 15.48, 12.39, 3.72, 
23.61, 4, 31.87, 25.33, 7.64, -0.26, 2.9), industry = c("D", 
"D", "E", "E", "F", "F", "F", "F", "D", "D", "E", "E", "D", "D", 
"E", "E", "F", "F", "F", "F", "D", "D", "F", "F", "E", "E", "D", 
"D", "D", "D", "E", "E", "F", "F", "D", "D", "E", "E", "E", "E", 
"D", "D", "E", "E", "D", "D", "D", "D", "E", "E", "D", "D", "F", 
"F", "D", "D", "D", "D", "E", "E", "D", "D", "E", "E", "D", "D", 
"D", "D", "D", "D", "F", "F", "F", "F", "E", "E", "D", "D", "E", 
"E", "F", "F", "E", "E", "F", "F", "E", "E", "F", "F", "D", "D", 
"D", "D", "D", "D", "D", "D", "F", "F"), urbanisation = c("B", 
"B", "A", "A", "B", "B", "A", "A", "C", "C", "C", "C", "A", "A", 
"B", "B", "C", "C", "A", "A", "C", "C", "B", "B", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "C", "C", "B", "B", "B", "B", 
"B", "B", "C", "C", "A", "A", "B", "B", "B", "B", "A", "A", "B", 
"B", "A", "A", "A", "A", "B", "B", "C", "C", "A", "A", "C", "C", 
"A", "A", "B", "B", "A", "A", "B", "B", "B", "B", "B", "B", "C", 
"C", "A", "A", "A", "A", "A", "A", "A", "A", "C", "C", "A", "A", 
"B", "B", "A", "A", "B", "B", "B", "B"), size = c(1, 1, 5, 5, 
5, 5, 1, 1, 1, 1, 5, 5, 5, 5, 2, 2, 2, 2, 5, 5, 1, 1, 1, 1, 5, 
5, 5, 5, 4, 4, 5, 5, 5, 5, 4, 4, 2, 2, 5, 5, 1, 1, 1, 1, 2, 2, 
1, 1, 2, 2, 5, 5, 1, 1, 3, 3, 2, 2, 2, 2, 5, 5, 4, 4, 1, 1, 5, 
5, 2, 2, 5, 5, 2, 2, 2, 2, 4, 4, 3, 3, 4, 4, 3, 3, 3, 3, 3, 3, 
5, 5, 3, 3, 2, 2, 3, 3, 1, 1, 5, 5), base_rate = c(14L, 14L, 
14L, 14L, 19L, 19L, 30L, 30L, 20L, 20L, 29L, 29L, 20L, 20L, 20L, 
20L, 24L, 24L, 19L, 19L, 29L, 29L, 14L, 14L, 24L, 24L, 24L, 24L, 
24L, 24L, 14L, 14L, 17L, 17L, 19L, 19L, 33L, 33L, 19L, 19L, 24L, 
24L, 14L, 14L, 20L, 20L, 23L, 23L, 33L, 33L, 29L, 29L, 19L, 19L, 
33L, 33L, 24L, 24L, 24L, 24L, 19L, 19L, 33L, 33L, 29L, 29L, 33L, 
33L, 29L, 29L, 20L, 20L, 24L, 24L, 30L, 30L, 29L, 29L, 17L, 17L, 
30L, 30L, 23L, 23L, 24L, 24L, 24L, 24L, 20L, 20L, 29L, 29L, 24L, 
24L, 19L, 19L, 20L, 20L, 20L, 20L), taxrate = c(12L, 14L, 14L, 
12L, 21L, 18L, 30L, 30L, 20L, 20L, 29L, 30L, 20L, 20L, 20L, 20L, 
24L, 24L, 21L, 18L, 30L, 29L, 14L, 12L, 24L, 24L, 24L, 24L, 24L, 
24L, 14L, 12L, 18L, 19L, 18L, 21L, 33L, 32L, 21L, 18L, 24L, 24L, 
12L, 14L, 20L, 20L, 22L, 25L, 32L, 33L, 30L, 29L, 18L, 21L, 32L, 
33L, 24L, 24L, 24L, 24L, 18L, 21L, 32L, 33L, 30L, 29L, 32L, 33L, 
29L, 30L, 20L, 20L, 24L, 24L, 30L, 30L, 29L, 30L, 18L, 19L, 30L, 
30L, 22L, 25L, 24L, 24L, 24L, 24L, 20L, 20L, 30L, 29L, 24L, 24L, 
21L, 18L, 20L, 20L, 20L, 20L), vote = c(0, 0, 0, 0, 1, 1, 1, 
0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 
1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 
1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 
1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 
1, 0, 1, 1, 1, 1, 0, 1, 1), votewon = c(0, 0, 0, 0, 1, 0, 1, 
0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 
1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 
0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 
1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 
0, 0, 1, 1, 0, 1, 0, 1, 1)), class = "data.frame", row.names = c(NA, 
-100L))

## convert variables to factors beforehand
DF[c(1, 2, 4, 5, 6, 9, 10)] <- lapply(DF[c(1, 2, 4, 5, 6, 9, 10)], factor)

【问题讨论】:

    标签: r function syntax formula


    【解决方案1】:

    问题:如果您将公式对象传递给模型,则模型对象会将公式对象的名称存储在 foo$call$formula 插槽中。这就是导致您的错误的原因。您可以通过调用all.vars(eval(foo$call$formula)) 来修复它,这是一个示例:

    form <- formula(y~x)
    lm1 <- lm(y~x, data.frame(y = rnorm(10), x = rnorm(10)))
    lm2 <- lm(form, data.frame(y = rnorm(10), x = rnorm(10)))
    
    all.vars(lm1$call)
    # returns:  "y" "x"
    all.vars(lm2$call)
    # returns: "form"
    
    lm1$call$formula
    # returns: y ~ x
    lm2$call$formula
    # returns: form
    
    eval(lm1$call$formula)
    # returns: y ~ x
    eval(lm2$call$formula)
    # returns: y ~ x
    
    all.vars(eval(lm1$call$formula))
    # returns:  "y" "x"
    all.vars(eval(lm2$call$formula))
    # returns:  "y" "x"
    

    【讨论】:

    • 非常感谢您的回答!我以为您的回答意味着我可以在vn 之前将s1$call$formula &lt;- all.vars(eval(s1$call$formula))s2$call$formula &lt;- all.vars(eval(s2$call$formula)) 添加到函数中,但显然我误解了您的答案。我应该如何将其放入函数中?
    • 顺便说一句,您的回答中有一个轻微的错字。您在最后一部分中两次调用lm1
    • 我认为问题是我必须在 ## turn factor variables into dummies 之前调整名称。但不知怎的,我总是出错。
    • 如果您将代码中的c(all.vars(s$call)[1], colnames(model.matrix(s))[-1])) 替换为c(all.vars(eval(s$call$formula))[1], colnames(model.matrix(s))[-1])),它将运行。
    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2021-07-21
    • 2019-10-16
    • 2019-01-15
    • 1970-01-01
    • 1970-01-01
    • 2020-06-12
    相关资源
    最近更新 更多