【问题标题】:Fit a generalized linear model (glm) with a categorical variable of month using the function monthglm() in R使用 R 中的函数 monthglm() 拟合具有月份分类变量的广义线性模型 (glm)
【发布时间】:2020-12-02 11:51:30
【问题描述】:

问题

我有一个数据框(见下文),我想使用基于季节和年份协变量的函数 monthglm() 来拟合具有月份分类变量的一般线性模型 (glm)。

在我运行以下由Barnett, A.G., Dobson, A.J. (2010) Analysing Seasonal Health Data. Springer.(见下文)编写的函数后,我不断收到此错误消息。

如果有人能提供帮助,我将不胜感激。

加载包

library(season)
library(MASS) # for mvrnorm
library(survival) # for coxph
library(ggplot2)

功能:

monthglm<-function(formula,data,family=gaussian(),refmonth=1,
                   monthvar='month',offsetmonth=FALSE,offsetpop=NULL){
  ## checks
  if (refmonth<1|refmonth>12){stop("Reference month must be between 1 and 12")}
  ## original call with defaults (see amer package)
  ans <- as.list(match.call())
  frmls <- formals(deparse(ans[[1]]))
  add <- which(!(names(frmls) %in% names(ans)))
  call<-as.call(c(ans, frmls[add]))
  
  monthvar=with(data,get(monthvar))
  cmonthvar=class(monthvar)
  ## If month is a character, create the numbers
  if(cmonthvar%in%c('factor','character')){
    if(cmonthvar=='character'){
      if(max(nchar(monthvar))==3){mlevels=substr(month.name,1,3)}else{mlevels=month.name}
      monthvar=factor(monthvar,levels=mlevels)
    }
    months=as.numeric(monthvar)
    data$month=months # add to data for flagleap
    months=as.factor(months)
    levels(months)[months]<-month.abb[months]
    months<-relevel(months.u,ref=month.abb[refmonth]) # set reference month
  }
  ## Transform month numbers to names
  if(cmonthvar%in%c('integer','numeric')){
    months.u<-as.factor(monthvar)  
    nums<-as.numeric(nochars(levels(months.u))) # Month numbers
    levels(months.u)[nums]<-month.abb[nums]
    months<-relevel(months.u,ref=month.abb[refmonth]) # set reference month
  }
  ## prepare data/formula
  parts<-paste(formula)
  f<-as.formula(paste(parts[2],parts[1],parts[3:length(formula)],'+months'))
  dep<-parts[2] # dependent variable
  days<-flagleap(data=data,report=FALSE,matchin=T) # get the number of days in each month
  l<-nrow(data)
  if(is.null(offsetpop)==FALSE){poff=with(data,eval(offsetpop))} else{poff=rep(1,l)} # 
  if(offsetmonth==TRUE){moff=days$ndaysmonth/(365.25/12)} else{moff=rep(1,l)} # days per month divided by average month length
  ###  data$off<-log(poff*moff)
  off<-log(poff*moff)  # 
  fit<-glm(formula=f,data=data,family=family,offset=off)
  ## return
  toret<-list()
  toret$call<-call
  toret$glm<-fit
  toret$fitted.values<-fitted(fit)
  toret$residuals<-residuals(fit)
  class(toret)<-'monthglm'
  return(toret)
}

#The levels of a factor must match the observed values. 
#If you want to change how those values print out, you need to change the labels. 

错误信息

model<-monthglm(formula=Frequency_Blue~Year+Monsoon_Season, family=gaussian,
+                       offsetmonth=TRUE, refmonth=1, data=Final_New_Blue)
Error in nochars(levels(months.u)) : could not find function "nochars"

数据框

   structure(list(Year = c(2015L, 2016L, 2017L, 2015L, 2016L, 2017L, 
2015L, 2016L, 2017L, 2015L, 2016L, 2017L, 2015L, 2016L, 2017L, 
2015L, 2016L, 2017L, 2015L, 2016L, 2017L, 2015L, 2016L, 2017L, 
2015L, 2016L, 2017L, 2015L, 2016L, 2017L, 2015L, 2016L, 2017L, 
2015L, 2016L, 2017L), Month = structure(c(5L, 5L, 5L, 4L, 4L, 
4L, 8L, 8L, 8L, 1L, 1L, 1L, 9L, 9L, 9L, 7L, 7L, 7L, 6L, 6L, 6L, 
2L, 2L, 2L, 12L, 12L, 12L, 11L, 11L, 11L, 10L, 10L, 10L, 3L, 
3L, 3L), .Label = c("April", "August", "December", "Feb", "Jan", 
"July", "June", "Mar", "May", "November", "October", "September"
), class = "factor"), Frequency_Blue_Whales_Year_Month = c(76L, 
78L, 66L, 28L, 54L, 37L, 39L, 31L, 88L, 46L, 23L, 54L, 5L, 8L, 
0L, 0L, 0L, 0L, 0L, 4L, 7L, 22L, 6L, 44L, 10L, 30L, 35L, 88L, 
41L, 35L, 4L, 30L, 43L, 65L, 43L, 90L), Season = structure(c(4L, 
4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
5L, 5L, 5L), .Label = c("Autumn", "Spring", "Summer", "winter", 
"Winter"), class = "factor")), class = "data.frame", row.names = c(NA, 
-36L))

参数

【问题讨论】:

  • 我没有从你的例子中得到同样的错误。由于您的代码未指定monthvar,并且该函数搜索默认值month,因此我收到错误消息。如果我添加该参数,我会收到关于缺失值months.u 的不同错误
  • nochars 函数是 season 包的内部函数,因此您收到该错误令人惊讶。你确定你加载了包?您是为这篇文章手动输入功能代码,还是您是这样运行代码的?
  • 我再次尝试安装季节:这是错误消息:库(季节)正在加载所需的包:ggplot2 正在加载所需的包:MASS 正在加载所需的包:生存附加包:'season' 以下对象是屏蔽 by '.GlobalEnv': monthglm 警告消息:包'season' 是在 R 版本 4.0.2 下构建的
  • Whale_model
  • 如果我将monthvar = 'Month' 添加到模型中,这就是上面的错误消息。你知道这里有什么问题吗?我对 R 比较陌生

标签: r function regression glm


【解决方案1】:

通过以下对代码和函数调用的简单更改,我能够让模型运行。我将其命名为monthglm2,以区别于包函数。通过调用您的数据df

library(season)
library(MASS) # for mvrnorm
library(survival) # for coxph
library(ggplot2)


monthglm2<-function(formula,data,family=gaussian(),refmonth=1,
                   monthvar='month',offsetmonth=FALSE,offsetpop=NULL){
  ## checks
  if (refmonth<1|refmonth>12){stop("Reference month must be between 1 and 12")}
  ## original call with defaults (see amer package)
  ans <- as.list(match.call())
  frmls <- formals(deparse(ans[[1]]))
  add <- which(!(names(frmls) %in% names(ans)))
  call<-as.call(c(ans, frmls[add]))

  monthvar=with(data,get(monthvar))
  cmonthvar=class(monthvar)
  ## If month is a character, create the numbers
  if(cmonthvar%in%c('factor','character')){
    if(cmonthvar=='character'){
      if(max(nchar(monthvar))==3){mlevels=substr(month.name,1,3)}else{mlevels=month.name}
      monthvar=factor(monthvar,levels=mlevels)
    }
    months=as.numeric(monthvar)
    data$month=months # add to data for flagleap
    months=as.factor(months)
    levels(months)[months]<-month.abb[months]
    months<-relevel(months,ref=month.abb[refmonth]) # set reference month ### TYPO HERE, changed from months.u
  }
  ## Transform month numbers to names
  if(cmonthvar%in%c('integer','numeric')){
    months.u<-as.factor(monthvar)
    nums<-as.numeric(nochars(levels(months.u))) # Month numbers
    levels(months.u)[nums]<-month.abb[nums]
    months<-relevel(months.u,ref=month.abb[refmonth]) # set reference month
  }
  ## prepare data/formula
  parts<-paste(formula)
  f<-as.formula(paste(parts[2],parts[1],parts[3:length(formula)],'+months'))
  dep<-parts[2] # dependent variable
  days<-flagleap(data=data,report=FALSE,matchin=T) # get the number of days in each month
  l<-nrow(data)
  if(is.null(offsetpop)==FALSE){poff=with(data,eval(offsetpop))} else{poff=rep(1,l)} #
  if(offsetmonth==TRUE){moff=days$ndaysmonth/(365.25/12)} else{moff=rep(1,l)} # days per month divided by average month length
  ###  data$off<-log(poff*moff)
  off<-log(poff*moff)  #
  fit<-glm(formula=f,data=data,family=family,offset=off)
  ## return
  toret<-list()
  toret$call<-call
  toret$glm<-fit
  toret$fitted.values<-fitted(fit)
  toret$residuals<-residuals(fit)
  class(toret)<-'monthglm'
  return(toret)
}


df$year <- df$Year
monthglm2(formula=Frequency_Blue_Whales_Year_Month~Year+Season, family=gaussian(),  offsetmonth=TRUE, monthvar='Month', refmonth=1, data=df)

函数中存在另一个问题,我必须将列重命名为 year。如果您查看此软件包的 github,则只有一个贡献者并且没有提出任何问题。使用这样的包有利有弊:它们可能具有有用的新颖方法,但无法快速识别和解决错误。如果您继续进行季节性分析,我建议您尝试直接学习如何在 glm 中包含季节性建模

【讨论】:

  • 谢谢!我真的很感谢你的帮助。祝你有美好的一天:)
猜你喜欢
  • 1970-01-01
  • 2015-07-21
  • 2021-12-25
  • 2021-07-29
  • 1970-01-01
  • 2014-05-22
  • 2013-04-15
  • 2011-11-21
  • 1970-01-01
相关资源
最近更新 更多