【问题标题】:R function stops running with no visible errorsR 函数停止运行,没有可见的错误
【发布时间】:2020-09-14 19:00:15
【问题描述】:

我编写了一个自定义函数来读取 JSON 文件并提取我需要的所有相关信息,目标是在目录中的所有文件上运行它。我已经创建了所有文件的字符向量并使用 sapply/lapply,我已经能够运行如下函数。

setwd("/directory/")
file.list=dir()
sapply(file.list,function)

在执行过程中由于某种原因,它到达了输出以下内容并无缘无故停止的地步,如下所示。我有警告被禁止,我得到的唯一警告是我所期望的。在所有失败的文件上单独运行解析器,给我我想查看的表。

#expected output
#expected output

$'filename'
[1]FALSE
$'filename'
[1]NULL

如果有帮助,我已经在此处附加了我的解析器,我确信它没有经过优化并且有更好的方法可以做到这一点,但速度不是这里的主要问题。提前致谢!

library(jsonlite)
library(data.table)
library(dplyr)
library(plyr)
library(stringr)
library(tidyr)

trialParse=function(filename){
  options(warn=-1)
  options(max.print=99999)
  parsefile=read_json(filename)
  title=str_remove(basename(filename),".json")
  #cat(sprintf("Schema Version is %s\n", parsefile$metadata$schemaVersion))
    if (parsefile$report$workflow$reportType!="DNA"){
      #cat(sprintf("%s report is not DNA, moved\n",title))
      from=paste("~/JSON_parsing/workingFiles/",title,".json",sep='')
      to=paste("~/JSON_parsing/failedFiles/",title,".json",sep='')
      file.rename(from=from,to=to)
    }else{
      #cat(sprintf("%s\n",title))
      
      #Extract report info
      if (exists("report",parsefile)&&length(parsefile$report)!=0){
        reportData=rbindlist(list(parsefile$report,parsefile$report$workflow),fill=TRUE)
        reportData$workflow=NULL
        reportData[is.na(reportData)]=""
        reportData=reportData%>%
          unique()%>%
          summarize_all(funs(trimws(paste(.,collapse=''))))
        #assign(paste("reportData_",title,sep=''),reportData,envir=.GlobalEnv)
      }else{
        print("No report info")
      }
      
      #Extract patient info  
      if(exists("patient",parsefile)&&length(parsefile$patient)!=0){
        patientData=as.data.frame(t(unlist(parsefile$patient)))
        #assign(paste("patientData_",title,sep=''),patientData,envir=.GlobalEnv)
      }else{
        print("No patient info")
      }
      
      #Extract order info
      if(exists("report",parsefile)&&length(parsefile$report)!=0){
        orderData=rbindlist(list(parsefile$order,parsefile$order$test),fill=TRUE)
        orderData$test=NULL
        orderData[is.na(orderData)]=""
        orderData=orderData%>%
          unique()%>%
          summarize_all(funs(trimws(paste(.,collapse=''))))
        #assign(paste("orderData_",title,sep=''),orderData,envir=.GlobalEnv)
      }else{
        print("No order info")
      }
      
      #Extract specimens info
      if(exists("specimens",parsefile)&&length(parsefile$specimens)!=0){
        specimens=list()
        for(i in 1:length(parsefile$specimens)){
          specimens[[i]]=as.data.frame(t(unlist(parsefile$specimens[[i]])))
        }
        specimensData=do.call(rbind.fill,specimens)%>%
          unique()
        #assign(paste("specimensData_",title,sep=''),specimensData,envir=.GlobalEnv)
      }else{
        print("No specimens info")
      }
      
      #Extract   mutations info
      if (exists("results",parsefile)&&length(parsefile$results)!=0){
        #Tumor Mutational Burden
        if (length(parsefile$results$tumorMutationalBurden!=0)){
          tmbdata=as.data.frame(t(unlist(c(parsefile$results[1],parsefile$results[2],parsefile$results[3]))))
          #assign(paste("tmbData_",title,sep=''),tmbdata,envir=.GlobalEnv)
        }
        #Somatic Potentially Actionable Mutations
        if (exists("somaticPotentiallyActionableMutations",parsefile$results)&&length(parsefile$results$somaticPotentiallyActionableMutations)!=0){
          #Remove Therapies
          for (i in 1:length(parsefile$results$somaticPotentiallyActionableMutations)){
            for (j in 1:length(parsefile$results$somaticPotentiallyActionableMutations[[i]]$variants)){
              parsefile$results$somaticPotentiallyActionableMutations[[i]]$variants[[j]]$therapies=NULL
              }
            }
          mutations=list()
          variants=list()
          #Extract Somatic Potentially Actionable Mutations data
            #Per Entry
          for(i in 1:length(parsefile$results$somaticPotentiallyActionableMutations)){
              #Per Variants in Entry
            for(j in 1:length(parsefile$results$somaticPotentiallyActionableMutations[[i]]$variants)){
              variants[[j]]=rbindlist(list(parsefile$results$somaticPotentiallyActionableMutations[[i]]$variants[j],parsefile$results$somaticPotentiallyActionableMutations[[i]]$variants[[j]]),fill=TRUE)
            }
              #Per Variants in Entry
            for (j in 1:length(variants)){
              variantsData=do.call(rbind,variants[j])
              mutationsList=parsefile$results$somaticPotentiallyActionableMutations[[i]]
              mutationsList$variants=NULL
              mutationTable=rbindlist(list(mutationsList,variantsData),fill=TRUE)
              mutations=append(mutations,list(rbindlist(list(mutationsList,variantsData),fill=TRUE)))
            }
          }
          #Build SPAMS table
          SPAMsData=do.call(rbind,mutations)
          SPAMsData$mutationEffect=NULL
          SPAMsData=SPAMsData%>%
            unique()%>%
            fill(c(1:4),.direction=c("down"))%>%
            fill(c(5:ncol(SPAMsData)),.direction=c("up"))%>%
            unique()%>%
            mutate(mutation_type="Somatic Potentially Actionable Mutation")
        }else{
          print("No SPAMS")
        }
        
        #Somatic Potentially Actionable Copy Number Variants
        if (exists("somaticPotentiallyActionableCopyNumberVariants",parsefile$results)&&length(parsefile$results$somaticPotentiallyActionableCopyNumberVariants)!=0){
          #Remove Therapies
          for (i in 1:length(parsefile$results$somaticPotentiallyActionableCopyNumberVariants)){
            parsefile$results$somaticPotentiallyActionableCopyNumberVariants[[i]]$therapies=NULL
          } 
          #Extract Somatic Potentially Actionable Copy Number Variants Data
          variants=list()
          for(i in 1:length(parsefile$results$somaticPotentiallyActionableCopyNumberVariants)){
            variants[[i]]=as.data.frame(t(unlist(parsefile$results$somaticPotentiallyActionableCopyNumberVariants[[i]])))
          }
          SPACNVsData=do.call(rbind,variants)%>%
            mutate(mutation_type="Somatic Potentially Actionable Copy Number Variants")
          rm(variants)
        }else{
          print("No SPACNVs")
        }
        
        #Somatic Biologically Relevant Variants
        if (exists("somaticBiologicallyRelevantVariants",parsefile$results)&&length(parsefile$results$somaticBiologicallyRelevantVariants)!=0){
          #Extract Data
          variants=list()
          for(i in 1:length(parsefile$results$somaticBiologicallyRelevantVariants)){
            variants[[i]]=as.data.frame(t(unlist(parsefile$results$somaticBiologicallyRelevantVariants[[i]])))
          }
          SBRVsData=do.call(rbind,variants)%>%
            mutate(mutation_type="Somatic Biologically Relevant Variants")
          rm(variants)
        }else{
          print("No SBRVs")
        }
        
        #Somatic Variants of Unknown Significance
        if (exists("somaticVariantsOfUnknownSignificance",parsefile$results)&&length(parsefile$results$somaticVariantsOfUnknownSignificance)!=0){
          #Extract Data
          variants=list()
          for(i in 1:length(parsefile$results$somaticVariantsOfUnknownSignificance)){
            variants[[i]]=as.data.frame(t(unlist(parsefile$results$somaticVariantsOfUnknownSignificance[[i]])))
          }
          SVUSsData=do.call(rbind,variants)%>%
            mutate(mutation_type="Somatic Variants of Unknown Significance")
          rm(variants)
        }else{
          print("No SVUSs")
        }
        
        #Fusion Variants
        if (exists("fusionVariants",parsefile$results)&&length(parsefile$results$fusionVariants)!=0){
          #Remove Therapies
          for (i in 1:length(parsefile$results$fusionVariants)){
            parsefile$results$fusionVariants[[i]]$therapies=NULL
          }
          #Extract Data
          variants=list()
          for(i in 1:length(parsefile$results$fusionVariants)){
            variants[[i]]=as.data.frame(t(unlist(parsefile$results$fusionVariants[[i]])))
          }
          FVsData=do.call(rbind,variants)%>%
            mutate(mutation_type="Fusion Variants")%>%
            dplyr::rename(gene5display=gene5Display)%>%
            dplyr::rename(gene3display=gene3Display)
          rm(variants)
        }else{
          #print("No FVs")
        }
        
        #Inherited Relevant Variants
        if (exists("inheritedRelevantVariants",parsefile$results)&&length(parsefile$results$inheritedRelevantVariants)!=0){
          #Extract Data
          if (length(parsefile$results$inheritiedIncidentalFindings)!=0){
          variants=list()
          for(i in 1:length(parsefile$results$inheritedRelevantVariants)){
            variants[[i]]=as.data.frame(t(unlist(parsefile$results$inheritedRelevantVariants[[i]])))
          }
          IRVsData=do.call(rbind,variants)%>%
            mutate(mutation_type="Inherited Relevant Variants")
          rm(variants)
          }else{
            print("No IRVs")
          }
        }else{
          print("No IRVs")
        }
        
        #Inherited Incidental Findings
        if (exists("inheritedIncidentalFindings",parsefile$results)&&length(parsefile$results$inheritedIncidentalFindings)!=0){
          #Extract Data
          if (length(parsefile$results$inheritiedIncidentalFindings)!=0){
          variants=list()
          for(i in 1:length(parsefile$results$inheritedIncidentalFindings)){
            variants[[i]]=as.data.frame(t(unlist(parsefile$results$inheritedIncidentalFindings[[i]])))
          }
          IIFsData=do.call(rbind,variants)%>%
            mutate(mutation_type="Inherited Incidental Findings")
          rm(variants)
          }else{
            print("No IIFs")
          }
        }else{
          print("No IIFs")
        }
        
        #Inherited Variants of Unknown Significance
        if (exists("inheritedVariantsOfUnknownSignificance",parsefile$results)&&length(parsefile$results$inheritedVariantsOfUnknownSignificance)!=0){
          #Extract Data
          if (length(parsefile$results$inheritiedIncidentalFindings)!=0){
          variants=list()
          for(i in 1:length(parsefile$results$inheritedVariantsOfUnknownSignificance)){
            variants[[i]]=as.data.frame(t(unlist(parsefile$results$inheritedVariantsOfUnknownSignificance[[i]])))
          }
          IVUSsData=do.call(rbind,variants)%>%
            mutate(mutation_type="Inherited Variants of Unknown Significance")
          rm(variants)
          }else{
            print("No IVUSs")
          }
        }else{
          print("No IVUSs")
        }
        
        #Merge and Output data tables
        
        if (exists(c("SPAMsData","SBRVsData","SVUSsData","FVsData","IRVsData","IIFsData","IVUSsData"))){
        mergedMutations=rbind.fill(get0("SPAMsData"),get0("SPACNVsData"),get0("SBRVsData"),get0("SVUSsData"),get0("FVsData"),get0("IRVsData"),get0("IIFsData"),get0("IVUSsData"))%>%
          select(mutation_type,everything())
        
        outMutations=merge(patientData,mergedMutations)
        write.csv(outMutations,"~/JSON/mutations.csv",append=TRUE)
        #assign(paste("mergedMutations_",title,sep=''),mergedMutations,envir=.GlobalEnv)
        #assign(paste("patientMutations_",title,sep=''),merge(patientData,mergedMutations),envir=.GlobalEnv)
        }else{
          print("No mutations info")
        }
        
      }else{
        print("No mutations info")
      } 
      
      if (exists(c("orderData","reportData","specimensData","tmbData"))){        outPatients=rbind.fill(get0("patientData"),get0("orderData"),get0("reportData"),get0("specimensData"),get0("tmbData"))        
        write.csv(outPatients,"~/JSON/patients.csv",append=TRUE)        
      }else{
        print("Missing patient info")
      }        
    }  
}



【问题讨论】:

  • 这段代码太庞大了,任何人都无法编写一个合理的答案。在你的位置上,我会尝试调试代码。仔细阅读哈德利的书中Advanced R 中的rstudios guide to debuggingchapter 22。使用这些知识来找到您的问题并将其隔离,可能通过跟踪整个函数的值。一旦你发现问题,如果你不能解决它。制作一个较小的示例来复制问题,然后发布一个新问题。
  • 调试对于任何想要编程的人、数据科学家、分析师等来说都是一项基本技能。这是在实践中解决绝大多数编程问题的方法。
  • 附加说明:即使使用缩减的功能,这也是不可重现的,因为我们不知道发生此故障时它正在操作的文件。您对 "it prints #expected output" 的总结并不能帮助我们缩小问题的范围,因为该字符串不会出现在您的函数中。我理解(并且普遍赞赏)减少您的问题大小的尝试,但是(1)该功能已经通过该建议; (2) 我建议您提供明确的(不一定是全部)上下文。祝你好运!

标签: r json jsonlite


【解决方案1】:

解决了这个问题,有几个文件返回 NULL,代码不知道如何处理并隐藏在我必须解析的数千个文件中。此代码用于处理健康记录,很抱歉我无法发布太多细节!感谢您的帮助!

【讨论】:

    猜你喜欢
    • 1970-01-01
    • 2011-08-16
    • 2011-01-06
    • 2016-09-13
    • 2020-05-07
    • 2018-03-15
    • 2015-07-15
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多