【发布时间】:2018-08-23 13:41:31
【问题描述】:
我一直在尝试对这个数据集应用一个函数,该函数由 AB 设计的单个研究的数据组成。它由 6 个变量组成:层级(AB 循环数,在此特定数据集中始终为 1,因为所有研究都是 AB 设计的);湾。 ID(参与者 ID 代码),c。量表(评估参与者的量表类型),d。时间(评级的情况),例如。阶段(A = 基线,B = 干预),f。分数(参与者的分数)。
library(purrr)
library(dplyr)
Tier <- rep(c(1), 36)
ID <- c("C1", "C1", "C1", "C1", "C1", "C1", "C1", "C1", "C1", "C1", "C1", "C2", "C2", "C2", "C2", "C2", "C2", "C2", "C2", "C1", "C1", "C1", "C1", "C1", "C1", "C1", "C1", "C2", "C2", "C2", "C2", "C2", "C2", "C2", "C2", "C2")
Scale <- rep(c(1, 2), each = 18)
Time <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 9)
Phase <- c("A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B", "A", "A", "A", "A", "B", "B", "B", "B", "A", "A", "A", "A", "B", "B", "B", "B", "A", "A", "A", "A", "B", "B", "B", "B", "B")
Score <- c(4, 8, 10, 12, 15, 7, 7, 9, 14, 15, 16, 4, 3, 2, 2, 7, 7, 9, 14, 2, 3, 6, 6, 7, 5, 9, 11, 5, 6, 3, 4, 8, 7, 9, 3, 3)
db <- data.frame(Scale, ID, Time, Phase, Score, Tier)
这是两个函数,由 Manolov 和 Rochat (2015) 开发。他们完美地工作,直到一次计算多个参与者。所以我试图改变第二个(MPD_meta)的一些细节来做到这一点。
MPD <- function(a, b) { #Manolov & Rochat, 2015
# Obtain phase length
n_a <- length(a[!is.na(a)])
n_b <- length(b[!is.na(b)])
meanA <- mean(a[!is.na(a)])
# Estimate baseline trend
base_diff <- rep(0,(n_a - 1))
for (i in 1:(n_a - 1)) base_diff[i] <- a[!is.na(a)][i + 1] - a[!is.na(a)][i]
trendA <- mean(base_diff)
# Predict baseline data
baseline_pred <- rep(0, n_a)
midX <- median(c(1:n_a))
midY <- median(a[!is.na(a)])
is.wholenumber <- function(x, tol = .Machine$double.eps ^ 0.5) abs(x - round(x)) < tol
if (is.wholenumber(midX)) {
baseline_pred[midX] <- midY
for (i in (midX - 1): 1) baseline_pred[i] <- baseline_pred[i + 1] - trendA
for (i in (midX + 1): n_a) baseline_pred[i] <- baseline_pred[i - 1] + trendA
}
#baseline_pred
if (!is.wholenumber(midX)) {
baseline_pred[midX - 0.5] <- midY - (0.5 * trendA)
baseline_pred[midX + 0.5] <- midY + (0.5 * trendA)
for (i in (midX - 1.5) : 1) baseline_pred[i] <- baseline_pred[i + 1] - trendA
for (i in (midX + 1.5) : n_a) baseline_pred[i] <- baseline_pred[i - 1] + trendA
}
# Project baseline trend to the treatment phase
treatment_pred <- rep(0, n_b)
treatment_pred[1] <- baseline_pred[n_a] + trendA
for (i in 2:n_b)
treatment_pred[i] <- treatment_pred[i - 1] + trendA
diff_absolute <- rep(0, n_b)
diff_relative <- rep(0, n_b)
for (i in 1:n_b) {
diff_absolute[i] <- b[!is.na(b)][i] - treatment_pred[i]
if (treatment_pred[i]!=0) diff_relative[i] <- ((b[!is.na(b)][i] - treatment_pred[i]) / abs(treatment_pred[i])) * 100
}
mpd_value <- mean(diff_absolute)
mpd_relative <- mean(diff_relative)
mpd_relsd <- mpd_value/sd(a[!is.na(a)])
# Compute the residual (MSE): difference between actual and predicted baseline data
residual <- 0
for (i in 1:n_a)
residual <- residual + (a[!is.na(a)][i] - baseline_pred[i]) * (a[!is.na(a)][i] - baseline_pred[i])
residual_mse <- residual/n_a
list(MPD_raw = mpd_value, Residual_MSE = residual_mse, MPD_sd = mpd_relsd) #, MPD_percent = mpd_relative, Baseline_pred = baseline_pred
}
MPD_meta <- function(DBR, n) {
MBD <- db %>%
dplyr::filter(Scale == DBR, ID == n)
# Create the objects necessary for the calculations and the graphs
# Dimensions of the data set
tiers <- max(MBD$Tier)
max.length <- max(MBD$Time)
max.score <- max(MBD$Score)
min.score <- min(MBD$Score)
# Vectors for keeping the main results
# Series lengths per tier
obs <- rep(0,tiers)
# Raw MPD effect sizes per tier
results <- rep(0,tiers)
# Weights per tier
tier.weights <- rep(0,tiers)
# Standardized MPD results per tier
stdresults <- rep(0,tiers)
# Obtain the values calling the MPD function
for (i in 1:tiers) {
tempo <- subset(MBD, Tier == i)
# Number of observations for the tier
obs[i] <- max(tempo$Time)
# Data corresponding to phase A
Atemp <- subset(tempo, Phase == "A")
Aphase <- Atemp$Score
# Data corresponding to phase B
Btemp <- subset(tempo, Phase == "B")
Bphase <- Btemp$Score
# Obtain the MPD
results[i] <- MPD(Aphase, Bphase)[[1]]
# Obtain the weight for the MPD
tier.weights[i] <- obs[i]
# Obtain the standardized-version of the MPD
stdresults[i] <- MPD(Aphase, Bphase)[[2]]
}
# Obtain the weighted average: single effect size per study
one_ES_num <- 0
for (i in 1:tiers)
one_ES_num <- one_ES_num + results[i]*tier.weights[i]
one_ES_den <- sum(tier.weights)
one_ES <- one_ES_num / one_ES_den
# Obtain the weight of the single effect size - it's a weight for the meta-analysis
# Weight 1: series length
weight <- sum(obs)
variation <- 0
if (tiers == 1) weight2 = 1
if (tiers != 1) {
for (i in 1:tiers)
variation <- variation + (stdresults[i]-one_stdES)*(stdresults[i]-one_stdES)
# Weight two: variability of the outcomes
coefvar <- (sqrt(variation/tiers))/abs(one_stdES)
weight2 <- 1/coefvar
}
# Weight
weight_std <- weight + weight2
# Obtain the standardized-version of the MPD values for each tier
# Obtain the standardized-version of the weighted average
one_stdES_num <- 0
one_stdES_den <- 0
for (i in 1:tiers)
if (abs(stdresults[i])!=Inf)
{
one_stdES_num <- one_stdES_num + stdresults[i]*tier.weights[i]
one_stdES_den <- one_stdES_den + tier.weights[i]
}
one_stdES <- one_stdES_num / one_stdES_den
data.frame(Id = n, ES = one_ES, stdES = one_stdES, weight = weight_std)
}
如您所见,当我将该功能应用于单个参与者时,它可以正常工作。 MPD_meta 需要两个参数,即秤的编号和参与者 ID。应用后,它会给出我想要的结果。
MPD_meta(1, "C1")
# Id ES stdES weight
#1 C1 -13.79167 0.325 7
但是,当我尝试将此函数映射到每个参与者的特定比例时,我未能获得我想要的结果。它给了我相同的行,每个参与者重复 n 次。请注意,结果仍然正确。显然,我可以说%>% unique(),它只会选择一个,但是当我处理更大的数据集时会花费很多时间。
map2_df(.x = db$Scale, .y = db$ID, ~ MPD_meta(.x, .y))
# Id ES stdES weight
#1 C1 -13.791667 0.3250000 7
#2 C1 -13.791667 0.3250000 7
#3 C1 -13.791667 0.3250000 7
#4 C1 -13.791667 0.3250000 7
#5 C1 -13.791667 0.3250000 7
#6 C1 -13.791667 0.3250000 7
#7 C1 -13.791667 0.3250000 7
【问题讨论】:
-
就您的数据框而言,
MPD的输入参数是什么? -
与两个阶段相关的分数; a = 对应于 A 阶段(基线)的数据,b = 对应于 B 阶段(干预)的数据
-
复制粘贴代码并运行
map2_df(.x = db$Scale, .y = db$ID, ~ MPD_meta(.x, .y))时,weight的值不同(12 而不是 7),知道为什么吗?