【问题标题】:partykit - Modify the terminal node of a boxplot to display y axis in the log scalepartykit - 修改箱线图的终端节点以在对数刻度中显示 y 轴
【发布时间】:2018-03-28 19:37:50
【问题描述】:

我正在尝试使用partykit 绘制由rpart 生成的回归树。生成树的代码是这样的:

library("rpart")
fit <- rpart(Price ~ Mileage + Type + Country, cu.summary)
library("partykit")
tree.2 <- as.party(fit)

plot(tree.2, type = "simple", terminal_panel = node_boxplot(tree.2,
                                                            col = "black", fill = "lightgray", width = 0.5, yscale = NULL,
                                                            ylines = 3, cex = 0.5, id = TRUE))

我正在尝试修改终端节点上的箱线图,使 y 轴处于对数刻度上。

我知道在尝试制作箱线图时,我们所要做的就是指定boxplot(data, log="y")。这就是为什么我尝试仅在使用函数boxplot 的单行中修改函数node_boxplot。但是我一直得到相同的图表。有什么我想念的吗?任何反馈将不胜感激。

node_boxplot2<-function (obj, col = "black", fill = "lightgray", bg = "white", 
          width = 0.5, yscale = NULL, ylines = 3, cex = 0.5, id = TRUE, 
          mainlab = NULL, gp = gpar()) 
{
  y <- log(obj$fitted[["(response)"]])
  stopifnot(is.numeric(y))
  if (is.null(yscale)) 
    yscale <- range(y) +c(0,0.1)* diff(range(y))
  rval <- function(node) {
    nid <- id_node(node)
    dat <- data_party(obj, nid)
    yn <- dat[["(response)"]]
    wn <- dat[["(weights)"]]
    if (is.null(wn)) 
      wn <- rep(1, length(yn))
    x <- boxplot(rep.int(yn, wn),plot = FALSE)
    top_vp <- viewport(layout = grid.layout(nrow = 2, ncol = 3, 
                                            widths = unit(c(ylines, 1, 1), c("lines", "null", 
                                                                             "lines")), heights = unit(c(1, 1), c("lines", 
                                                                                                                  "null"))), width = unit(1, "npc"), height = unit(1, 
                                                                                                                                                                   "npc") - unit(2, "lines"), name = paste("node_boxplot", 
                                                                                                                                                                                                           nid, sep = ""), gp = gp)
    pushViewport(top_vp)
    grid.rect(gp = gpar(fill = bg, col = 0))
    top <- viewport(layout.pos.col = 2, layout.pos.row = 1)
    pushViewport(top)
    if (is.null(mainlab)) {
      mainlab <- if (id) {
        function(id, nobs) sprintf("Node %s (n = %s)", 
                                   id, nobs)
      }
      else {
        function(id, nobs) sprintf("n = %s", nobs)
      }
    }
    if (is.function(mainlab)) {
      mainlab <- mainlab(names(obj)[nid], sum(wn))
    }
    grid.text(mainlab)
    popViewport()
    plot <- viewport(layout.pos.col = 2, layout.pos.row = 2, 
                     xscale = c(0, 1), yscale = yscale, name = paste0("node_boxplot", 
                                                                      nid, "plot"), clip = FALSE)
    pushViewport(plot)
    grid.yaxis()
    grid.rect(gp = gpar(fill = "transparent"))
    grid.clip()
    xl <- 0.5 - width/4
    xr <- 0.5 + width/4
    grid.lines(unit(c(xl, xr), "npc"), unit(x$stats[1], "native"), 
               gp = gpar(col = col))
    grid.lines(unit(0.5, "npc"), unit(x$stats[1:2], "native"), 
               gp = gpar(col = col, lty = 2))
    grid.rect(unit(0.5, "npc"), unit(x$stats[2], "native"), 
              width = unit(width, "npc"), height = unit(diff(x$stats[c(2, 
                                                                       4)]), "native"), just = c("center", "bottom"), 
              gp = gpar(col = col, fill = fill))
    grid.lines(unit(c(0.5 - width/2, 0.5 + width/2), "npc"), 
               unit(x$stats[3], "native"), gp = gpar(col = col, 
                                                     lwd = 2))
    grid.lines(unit(0.5, "npc"), unit(x$stats[4:5], "native"), 
               gp = gpar(col = col, lty = 2))
    grid.lines(unit(c(xl, xr), "npc"), unit(x$stats[5], "native"), 
               gp = gpar(col = col))
    n <- length(x$out)
    if (n > 0) {
      index <- 1:n
      if (length(index) > 0) 
        grid.points(unit(rep.int(0.5, length(index)), 
                         "npc"), unit(x$out[index], "native"), size = unit(cex, 
                                                                           "char"), gp = gpar(col = col))
    }
    upViewport(2)
  }
  return(rval)
}

【问题讨论】:

    标签: r tree boxplot party


    【解决方案1】:

    (1) 如果在对数尺度上绘图更合适,那么我通常认为在对数尺度上生长树也更好。在这里,您可以简单地使用rpart(log(Price) ~ ...)

    (2) 如果您只想在节点箱线图中绘制不同的比例,则需要做更多的工作,因为箱线图是使用grid.*() 函数“手动”绘制的。在下面的代码中,我通过获取日志来转换整体响应和节点中的响应以进行绘制。然后我只是根据需要修改grid.yaxis()。函数node_logboxplot() 只是node_boxplot() 的一个副本,经过一些简单的修改(标记为#!!#)。有了这个,你可以做到

    plot(tree.2, terminal_panel = node_logboxplot)
    

    相比

    plot(tree.2, terminal_panel = node_boxplot)
    

    修改面板功能:

    node_logboxplot <- function(obj,
                             col = "black",
                     fill = "lightgray",
                 bg = "white",
                     width = 0.5,
                     yscale = NULL,
                     ylines = 3,
                 cex = 0.5,
                     id = TRUE,
                             mainlab = NULL, 
                 gp = gpar())
    {
        y <- log(obj$fitted[["(response)"]]) #!!# log-transform overall response
        stopifnot(is.numeric(y))
    
        if (is.null(yscale)) 
            yscale <- range(y) + c(-0.1, 0.1) * diff(range(y))
    
        #!!# compute yaxis labels on original scale
        yaxis <- pretty(exp(y))
        yaxis <- yaxis[yaxis > 0]
    
        ### panel function for boxplots in nodes
        rval <- function(node) {
    
            ## extract data
        nid <- id_node(node)
        dat <- data_party(obj, nid)
        yn <- log(dat[["(response)"]]) #!!# log-transform response in node
        wn <- dat[["(weights)"]]
        if(is.null(wn)) wn <- rep(1, length(yn))
    
            ## parameter setup
        x <- boxplot(rep.int(yn, wn), plot = FALSE)
    
            top_vp <- viewport(layout = grid.layout(nrow = 2, ncol = 3,
                               widths = unit(c(ylines, 1, 1), 
                                             c("lines", "null", "lines")),  
                               heights = unit(c(1, 1), c("lines", "null"))),
                               width = unit(1, "npc"), 
                               height = unit(1, "npc") - unit(2, "lines"),
                   name = paste("node_boxplot", nid, sep = ""),
                   gp = gp)
    
            pushViewport(top_vp)
            grid.rect(gp = gpar(fill = bg, col = 0))
    
            ## main title
            top <- viewport(layout.pos.col=2, layout.pos.row=1)
            pushViewport(top)
            if (is.null(mainlab)) { 
          mainlab <- if(id) {
            function(id, nobs) sprintf("Node %s (n = %s)", id, nobs)
          } else {
            function(id, nobs) sprintf("n = %s", nobs)
          }
            }
        if (is.function(mainlab)) {
              mainlab <- mainlab(names(obj)[nid], sum(wn))
        }
            grid.text(mainlab)
            popViewport()
    
            plot <- viewport(layout.pos.col = 2, layout.pos.row = 2,
                             xscale = c(0, 1), yscale = yscale,
                 name = paste0("node_boxplot", nid, "plot"),
                 clip = FALSE)
    
            pushViewport(plot)
    
            grid.yaxis(at = log(yaxis), label = yaxis) #!!# use pre-computed axis labels
            grid.rect(gp = gpar(fill = "transparent"))
        grid.clip()
    
        xl <- 0.5 - width/4
        xr <- 0.5 + width/4
    
            ## box & whiskers
            grid.lines(unit(c(xl, xr), "npc"), 
                       unit(x$stats[1], "native"), gp = gpar(col = col))
            grid.lines(unit(0.5, "npc"), 
                       unit(x$stats[1:2], "native"), gp = gpar(col = col, lty = 2))
            grid.rect(unit(0.5, "npc"), unit(x$stats[2], "native"), 
                      width = unit(width, "npc"), height = unit(diff(x$stats[c(2, 4)]), "native"),
                      just = c("center", "bottom"), 
                      gp = gpar(col = col, fill = fill))
            grid.lines(unit(c(0.5 - width/2, 0.5+width/2), "npc"), 
                       unit(x$stats[3], "native"), gp = gpar(col = col, lwd = 2))
            grid.lines(unit(0.5, "npc"), unit(x$stats[4:5], "native"), 
                       gp = gpar(col = col, lty = 2))
            grid.lines(unit(c(xl, xr), "npc"), unit(x$stats[5], "native"), 
                       gp = gpar(col = col))
    
            ## outlier
            n <- length(x$out)
            if (n > 0) {
                index <- 1:n ## which(x$out > yscale[1] & x$out < yscale[2])
                if (length(index) > 0)
                    grid.points(unit(rep.int(0.5, length(index)), "npc"), 
                                unit(x$out[index], "native"),
                                size = unit(cex, "char"), gp = gpar(col = col))
            }
    
            upViewport(2)
        }
    
        return(rval)
    }
    class(node_logboxplot) <- "grapcon_generator"
    

    【讨论】:

    • 感谢您的回答!我绝对需要仔细查看所有 grid.*() 函数。
    • 它们与相应的基本图形函数非常相似,但参数有所简化。此外,您还可以在不同的视口中使用不同的坐标比例,这有助于更复杂的显示。
    猜你喜欢
    • 2022-10-09
    • 2018-06-12
    • 2015-08-19
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多