r 函数用于比较数据帧每一列的因子的连续水平的值答案

【问题标题】：r function to compare values for successive levels of a factor for each column of a data framer 函数用于比较数据帧每一列的因子的连续水平的值
【发布时间】：2014-03-02 07:24:50
【问题描述】：

我有一个数据框，其中包含 9 个变量的数据，每个变量在 7 个地点 (A-G) 在几种不同的条件下进行测量。

    my.df <- structure(list(Condition = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L), .Label = c("Cond1", "Cond2", "Cond3", 
"Cond4", "Cond5", "Cond6", "Cond7", "Cond8"), class = "factor"), 
    Site = structure(c(6L, 7L, 3L, 5L, 4L, 2L, 1L, 6L, 7L, 3L, 
    5L, 4L, 2L, 1L, 6L, 7L, 3L, 5L, 4L, 2L, 1L, 6L, 7L, 3L, 5L, 
    4L, 2L, 1L), .Label = c("A", "B", "C", "D", "E", "F", "G"
    ), class = "factor"), Variable1 = c(0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0.00333333333333333, 0, 0.00333333333333333, 0.0233333333333333, 
    0.02, 0, 0.00333333333333333, 0.0133333333333333, 0, 0.03, 
    0.376666666666667, 0.07, 0, 0, 0.0133333333333333, 0, 0, 
    0.256666666666667, 0.16), Variable2 = c(0.04, 0.0233333333333333, 
    0.0466666666666667, 0.256666666666667, 0.02, 0.07, 0.48, 
    0.0466666666666667, 0.0766666666666667, 0.0266666666666667, 
    0.08, 0.04, 0.136666666666667, 0.15, 0.433333333333333, 0.16, 
    1.69666666666667, 0.14, 1.99666666666667, 3.66, 3.18, 0.04, 
    0.0633333333333333, 0.446666666666667, 0.0733333333333333, 
    0.54, 1.83666666666667, 2.01), Variable3 = c(7.64333333333333, 
    5.16333333333333, 16.84, 2.52333333333333, 1.35333333333333, 
    27.2666666666667, 17.36, 1.48666666666667, 0.596666666666667, 
    8.07333333333333, 4.77, 3.4, 6.86666666666667, 4.26, 23.6466666666667, 
    16.87, 42.1766666666667, 20.23, 33.03, 52.37, 50.46, 9.19333333333333, 
    5.09, 13.6833333333333, 11.4566666666667, 16.1133333333333, 
    26.59, 23.76), Variable4 = c(1.86333333333333, 1.41666666666667, 
    0.263333333333333, 0.953333333333333, 0.28, 0.323333333333333, 
    0.28, 1.85, 1.43666666666667, 1.16, 1.28, 2.48, 3.49666666666667, 
    3.79, 15.96, 16.6466666666667, 16.8166666666667, 10.0933333333333, 
    16.9666666666667, 17.5633333333333, 19.83, 6.61, 6.20333333333333, 
    5.72, 5.13, 6.78333333333333, 8.94333333333333, 9.66), Variable5 = c(23.84, 
    23.91, 14.11, 18.5633333333333, 16.8933333333333, 9.37, 11.34, 
    33.63, 35.4633333333333, 24.04, 32.3066666666667, 20.8166666666667, 
    25.4466666666667, 25.14, 24.33, 24.9766666666667, 10.5966666666667, 
    17.4333333333333, 12.99, 10.1133333333333, 10.18, 33.9166666666667, 
    32.3433333333333, 18.3666666666667, 25.8766666666667, 17.7633333333333, 
    18.7466666666667, 18.71), Variable6 = c(7.47333333333333, 
    8.04, 16.4033333333333, 17.1866666666667, 16.5533333333333, 
    3.82666666666667, 4.39, 28.6466666666667, 37.14, 27.23, 35.54, 
    17.47, 17.84, 16.43, 10.5, 14.88, 6.04, 16.45, 8.90333333333333, 
    3.75666666666667, 3.55, 32.4533333333333, 28.2366666666667, 
    17.36, 26.2766666666667, 14.5266666666667, 11.9766666666667, 
    11.11), Variable7 = c(21.6933333333333, 21.27, 8.95666666666667, 
    15.73, 9.61333333333333, 14.9166666666667, 18.07, 18.43, 
    12.0466666666667, 13.7433333333333, 9.78, 25.76, 27.4066666666667, 
    31.42, 10.4966666666667, 8.57, 8.59333333333333, 8.36, 7.97333333333333, 
    6.08, 7.03, 7.28333333333333, 11.82, 19.3533333333333, 10.1366666666667, 
    19.27, 18.9833333333333, 21.19), Variable8 = c(19.4866666666667, 
    19.9766666666667, 25.7, 21.1966666666667, 38.0266666666667, 
    36.04, 40.49, 2.54666666666667, 1.83, 11.0133333333333, 3.76, 
    13.5633333333333, 5.42, 6.16, 4.58666666666667, 5.75333333333333, 
    8.76666666666667, 13.52, 10.41, 3.95666666666667, 4.56, 1.51, 
    3.45333333333333, 12.2333333333333, 6.62333333333333, 11.9566666666667, 
    4.67, 5.18), Variable9 = c(17.97, 20.1866666666667, 17.6633333333333, 
    23.61, 17.27, 8.18, 7.6, 13.3533333333333, 11.3933333333333, 
    14.7, 12.48, 16.4766666666667, 13.3666666666667, 12.64, 10.07, 
    12.1466666666667, 5.30666666666667, 13.78, 7.7, 2.13333333333333, 
    1.12, 8.98, 12.79, 12.83, 14.4133333333333, 13.0433333333333, 
    8.00666666666667, 8.23)), .Names = c("Condition", "Site", 
"Variable1", "Variable2", "Variable3", "Variable4", "Variable5", 
"Variable6", "Variable7", "Variable8", "Variable9"), row.names = c(NA, 
-28L), class = "data.frame")

现在，对于每个变量和每个条件，我想比较每个连续站点（A 到 G）的值。如果站点 B 的 x 值大于站点 A 的值，我想将站点 B 的值替换为站点 A 的值。另外，我想创建另一列 Y，并在 Y 中输入 B 处的 x 和 A 处的 x 之间的差异（假设 B>A）。

然后我想继续对站点 B 和 C 进行相同的比较（如果 C>B，将 C 替换为 B，并将差异放在 Y 列中）。在比较所有站点的值之后，继续在每个条件下对每个变量执行相同的操作。

我想编写一个函数来自动为我完成所有这些比较。我可以使用 lapply 在 colnames 上运行该函数（为每个变量运行它），我想我可以使用 ave （在 lapply 调用中）在“条件”的子集上运行我的内部函数。逻辑参数非常简单，但我不知道如何调用因子连续级别的值（站点 A 到 G）。基本上，内部函数（由ave 在lapply 中调用）是这样的：编辑：也许我正在重新考虑事情并且不需要'lapply'，可以使用'by'代替......但问题仍然存在

 for(i in 1:length(levels(my.df$Site)){
   if(levels(my.df$Site)[i+1] > levels(my.df$Site)[i]) {
     print(levels(my.df$Site)[i])  
     #But this isn't right, because I want the *value*, not the factor
   }
 })

必须有办法做到这一点；有任何想法吗？谢谢！

【问题讨论】：

在站点之间的连续比较中，如果使用起始值，或者 - 例如 - 如果 B 的值被 A 替换，则 B 和 C 之间的比较是用新值进行的乙？
是的；这个想法是，在每个连续的站点，观察值可以减少（并将差值添加到 Y 列），但永远不会增加。
对不起，我还是不明白。当你来比较B和C的时候，是用B的起始值还是B的新值（假设它已经被A代替了，因为它比A的大）？

标签： r

【解决方案1】：

我决定试一试，尽管我不确定我是否完全理解了这个问题。

由于您想将函数应用于“条件”和“变量”的每个组合，我认为您可以将 my.df 转换为长格式，在每个“cond ~ var”处拆分并应用函数。以下假设每个“cond ~ var”只有一个“站点”。我希望你能从中找到一些有用的东西：

my_long_df = reshape(my.df, direction = "long", 
                     idvar = c("Condition", "Site"), 
                     varying = list(3:11),
                     timevar = "Variable", v.names = "value")

spl_ldf = split(my_long_df, 
                 interaction(my_long_df$Condition, my_long_df$Variable, drop = T))

ff = function(x) {
  x = x[match(levels(my.df$Site), x$Site), ]
  x$Y = c(NA, diff(x$value))
  valT = head(x$value, -1)
  valF = x$value[-1]
  x$newvals = c(NA, ifelse(x$Y[-1] > 0, valT, valF))  
  return(x)
}
res = do.call(rbind, lapply(spl_ldf, ff))
res[29:49, ]  #just a subset of the output
#                  Condition Site Variable      value           Y    newvals
#Cond1.2.Cond1.A.2     Cond1    A        2 0.48000000          NA         NA
#Cond1.2.Cond1.B.2     Cond1    B        2 0.07000000 -0.41000000 0.07000000
#Cond1.2.Cond1.C.2     Cond1    C        2 0.04666667 -0.02333333 0.04666667
#Cond1.2.Cond1.D.2     Cond1    D        2 0.02000000 -0.02666667 0.02000000
#Cond1.2.Cond1.E.2     Cond1    E        2 0.25666667  0.23666667 0.02000000
#Cond1.2.Cond1.F.2     Cond1    F        2 0.04000000 -0.21666667 0.04000000
#Cond1.2.Cond1.G.2     Cond1    G        2 0.02333333 -0.01666667 0.02333333
#Cond2.2.Cond2.A.2     Cond2    A        2 0.15000000          NA         NA
#Cond2.2.Cond2.B.2     Cond2    B        2 0.13666667 -0.01333333 0.13666667
#Cond2.2.Cond2.C.2     Cond2    C        2 0.02666667 -0.11000000 0.02666667
#Cond2.2.Cond2.D.2     Cond2    D        2 0.04000000  0.01333333 0.02666667
#Cond2.2.Cond2.E.2     Cond2    E        2 0.08000000  0.04000000 0.04000000
#Cond2.2.Cond2.F.2     Cond2    F        2 0.04666667 -0.03333333 0.04666667
#Cond2.2.Cond2.G.2     Cond2    G        2 0.07666667  0.03000000 0.04666667
#Cond3.2.Cond3.A.2     Cond3    A        2 3.18000000          NA         NA
#Cond3.2.Cond3.B.2     Cond3    B        2 3.66000000  0.48000000 3.18000000
#Cond3.2.Cond3.C.2     Cond3    C        2 1.69666667 -1.96333333 1.69666667
#Cond3.2.Cond3.D.2     Cond3    D        2 1.99666667  0.30000000 1.69666667
#Cond3.2.Cond3.E.2     Cond3    E        2 0.14000000 -1.85666667 0.14000000
#Cond3.2.Cond3.F.2     Cond3    F        2 0.43333333  0.29333333 0.14000000
#Cond3.2.Cond3.G.2     Cond3    G        2 0.16000000 -0.27333333 0.16000000

【讨论】：