【问题标题】:consistent matched pairs in RR中的一致匹配对
【发布时间】:2015-09-06 13:57:21
【问题描述】:

所以使用Matching(Link to package here)

我们可以通过修改后的GenMatch 示例来工作。

library(Matching)
data(lalonde)

#introduce an id vaiable
lalonde$ID <- 1:length(lalonde$age)

X = cbind(lalonde$age, lalonde$educ, lalonde$black, lalonde$hisp, 
          lalonde$married, lalonde$nodegr, lalonde$u74, lalonde$u75, 
          lalonde$re75, lalonde$re74)

BalanceMat <- cbind(lalonde$age, lalonde$educ, lalonde$black, 
                    lalonde$hisp, lalonde$married, lalonde$nodegr, 
                    lalonde$u74, lalonde$u75, lalonde$re75, lalonde$re74, 
                    I(lalonde$re74*lalonde$re75))

genout <- GenMatch(Tr=lalonde$treat, X=X, BalanceMatrix=BalanceMat, estimand="ATE", 
                   pop.size=16, max.generations=10, wait.generations=1)

mout <- Match(Y=NULL, Tr=lalonde$treat, X=X,
              Weight.matrix=genout,
              replace=TRUE, ties=FALSE)

 

# here we set ties FALSE so we only have 1-1 Matching
summary(mout)

#now lets create our "Matched dataset"
treated <- lalonde[mout$index.treated,]
# and introduce an indetity variable for each pair
treated$Pair_ID <- treated$ID

non.treated <- lalonde[mout$index.control,]
non.treated$Pair_ID <- treated$ID

matched.data <- rbind(treated, non.treated)
matched.data <- matched.data[order(matched.data$Pair_ID),]

#this outputs which of the non-treated ID was paired with the first person
matched.data$ID[matched.data$Pair_ID==1 & matched.data$treat==0]

我们看到对于数据,ID=1 与 ID=193 匹配

现在让我们在数据的顺序中引入一些随机化,看看我们是否得到相同的对

n <- 500
P1 <- rep(NA, n)
P2 <- rep(NA, n)
P3 <- rep(NA, n)
P4 <- rep(NA, n)
P5 <- rep(NA, n)
P6 <- rep(NA, n)
P7 <- rep(NA, n)

for (i in 1:n) {
  lalonde <- lalonde[sample(1:nrow(lalonde)), ] # randomise order
  genout <- GenMatch(Tr=lalonde$treat, X=X, BalanceMatrix=BalanceMat, estimand="ATE", 
                     pop.size=16, max.generations=10, wait.generations=1)
  mout <- Match(Y=NULL, Tr=lalonde$treat, X=X,
                Weight.matrix=genout,
                replace=TRUE, ties=FALSE)
  summary(mout)
  treated <- lalonde[mout$index.treated,]
  treated$Pair_ID <- treated$ID
  non.treated <- lalonde[mout$index.control,]
  non.treated$Pair_ID <- treated$ID
  matched.data <- rbind(treated, non.treated)
  matched.data <- matched.data[order(matched.data$Pair_ID),]
  P1[i] <- matched.data$ID[matched.data$Pair_ID==1 & matched.data$treat==0]
  P2[i] <- matched.data$ID[matched.data$Pair_ID==2 & matched.data$treat==0]
  P3[i] <- matched.data$ID[matched.data$Pair_ID==3 & matched.data$treat==0]
  P4[i] <- matched.data$ID[matched.data$Pair_ID==4 & matched.data$treat==0]
  P5[i] <- matched.data$ID[matched.data$Pair_ID==5 & matched.data$treat==0]
  P6[i] <- matched.data$ID[matched.data$Pair_ID==6 & matched.data$treat==0]
  P7[i] <- matched.data$ID[matched.data$Pair_ID==7 & matched.data$treat==0]
}

因此,loop 将匹配对 500 次,P1 将每次保存 treat==0 的情况。

然后我们看看哪个P1出现得最多,通过:

plot(1:n, P1, main="P1")

summary(as.factor(P1))

我们看到没有一个treat==0 案例通常是配对的。 我希望有一个案例(可能 = 193??)通常配对,不依赖于数据的顺序。因此我认为我的循环是错误的。谁能指出在哪里?或者当他们运行一个循环时,他们发现,独立于数据的顺序,相似的情况是成对的??

【问题讨论】:

  • 我不确定GenMatch是否需要被调用到循环中,如果移除,模拟时间会显着减少

标签: r for-loop matching


【解决方案1】:

问题是您将lalonde 的顺序随机化,但您对GenMatchMatch 的输入是XBalanceMat,它们仍然具有原始顺序。当您最后构建您的matched.data 时,您正在使用不再与lalonde 绑定的索引进行子集化。再试一次,但在循环中包括XBalanceMat 的分配。

X = cbind(lalonde$age, lalonde$educ, lalonde$black, lalonde$hisp, 
          lalonde$married, lalonde$nodegr, lalonde$u74, lalonde$u75, 
          lalonde$re75, lalonde$re74)

BalanceMat <- cbind(lalonde$age, lalonde$educ, lalonde$black, 
                    lalonde$hisp, lalonde$married, lalonde$nodegr, 
                    lalonde$u74, lalonde$u75, lalonde$re75, lalonde$re74, 
                    I(lalonde$re74*lalonde$re75))

【讨论】:

  • 非常感谢朋友的帮助!!
猜你喜欢
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 2012-03-05
  • 2022-01-18
  • 2016-01-06
  • 1970-01-01
  • 1970-01-01
  • 2023-01-13
相关资源
最近更新 更多