【问题标题】:R - How to replace NA with conditionR - 如何用条件替换 NA
【发布时间】:2019-11-22 06:28:58
【问题描述】:

我有一个像上面这样的数据框。它有collegesub_collegedeptabbr。但缺少一些值。我想用相同的abbr 行替换NA

例如,我希望所有带有abbr = ATM S 的行都有:

college name = college of the enviroment
sub_college = collge on the enviorment
dept = atmospheric sciences

输入

    structure(list(year = c(2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L), term = c("Autumn", 
"Autumn", "Autumn", "Autumn", "Autumn", "Autumn", "Autumn", "Autumn", 
"Autumn", "Autumn", "Autumn", "Autumn", "Autumn", "Autumn", "Autumn", 
"Autumn", "Autumn", "Autumn", "Autumn", "Autumn", "Autumn", "Autumn", 
"Autumn", "Autumn", "Autumn", "Autumn", "Autumn", "Autumn", "Autumn", 
"Autumn", "Autumn", "Autumn", "Autumn", "Autumn", "Autumn", "Autumn", 
"Autumn", "Autumn", "Autumn", "Autumn", "Autumn", "Autumn", "Autumn", 
"Autumn", "Autumn", "Autumn", "Autumn", "Autumn", "Autumn", "Autumn"
), college_text = c(NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_), sub_college_text = c(NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_), 
    department_text = c(NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), dept_abbrev = c("A A", 
    "A A", "A A", "A A", "A A", "A A", "A A", "A A", "A A", "A A", 
    "A A", "A A", "A A", "A A", "A S", "A S", "A S", "A S", "A S", 
    "A S", "A S", "AAS", "AAS", "AAS", "AAS", "AAS", "AAS", "ACADEM", 
    "ACCTG", "ACCTG", "ACCTG", "ACCTG", "ACCTG", "ACCTG", "ACCTG", 
    "ACCTG", "ACCTG", "ACCTG", "ACCTG", "ACCTG", "ACCTG", "ACCTG", 
    "ACCTG", "ACCTG", "ACCTG", "ACCTG", "ACCTG", "ACCTG", "ACCTG", 
    "ACCTG"), course_no = c(198L, 210L, 310L, 311L, 320L, 402L, 
    405L, 430L, 447L, 461L, 501L, 503L, 516L, 530L, 101L, 101L, 
    101L, 211L, 331L, 331L, 431L, 101L, 330L, 360L, 370L, 385L, 
    395L, 197L, 215L, 215L, 215L, 225L, 301L, 301L, 301L, 302L, 
    303L, 303L, 311L, 320L, 320L, 321L, 321L, 411L, 440L, 485L, 
    500L, 500L, 506L, 521L), section_id = c("A  ", "A  ", "A  ", 
    "A  ", "A  ", "A  ", "A  ", "A  ", "A  ", "A  ", "A  ", "A  ", 
    "A  ", "B  ", "A  ", "B  ", "C  ", "A  ", "A  ", "B  ", "A  ", 
    "A  ", "A  ", "A  ", "A  ", "A  ", "A  ", "A  ", "A  ", "B  ", 
    "C  ", "A  ", "A  ", "B  ", "C  ", "A  ", "A  ", "B  ", "A  ", 
    "A  ", "B  ", "A  ", "B  ", "A  ", "A  ", "A  ", "C  ", "D  ", 
    "A  ", "A  "), course_code = c("A A 198 A", "A A 210 A", 
    "A A 310 A", "A A 311 A", "A A 320 A", "A A 402 A", "A A 405 A", 
    "A A 430 A", "A A 447 A", "A A 461 A", "A A 501 A", "A A 503 A", 
    "A A 516 A", "A A 530 B", "A S 101 A", "A S 101 B", "A S 101 C", 
    "A S 211 A", "A S 331 A", "A S 331 B", "A S 431 A", "AAS 101 A", 
    "AAS 330 A", "AAS 360 A", "AAS 370 A", "AAS 385 A", "AAS 395 A", 
    "ACADEM 197 A", "ACCTG 215 A", "ACCTG 215 B", "ACCTG 215 C", 
    "ACCTG 225 A", "ACCTG 301 A", "ACCTG 301 B", "ACCTG 301 C", 
    "ACCTG 302 A", "ACCTG 303 A", "ACCTG 303 B", "ACCTG 311 A", 
    "ACCTG 320 A", "ACCTG 320 B", "ACCTG 321 A", "ACCTG 321 B", 
    "ACCTG 411 A", "ACCTG 440 A", "ACCTG 485 A", "ACCTG 500 C", 
    "ACCTG 500 D", "ACCTG 506 A", "ACCTG 521 A"), course_title = c("SPECIAL TOPICS", 
    "ENGR STATICS", "ORB SPACE FLIGHT", "ATM FLIGHT MECH", "AEROSPACE INSTR", 
    "FLUID MECHANICS", "AEROSPACE PLASMAS", "FINTE ELEMENT ANALYS", 
    "CONT IN AEROSP", "ADVANCED PROPULSION", "PHYS GASDYNMCS I", 
    "CONTINUUM MECH", "STAB & CON FLT VEH", "MECHANICS OF SOLIDS", 
    "FOUNDATION USAF I", "FOUNDATION USAF I", "FOUNDATION USAF I", 
    "AIR & SPACE PWR I", "AEROSPACE STDYS 300", "AEROSPACE STDYS 300", 
    "AEROSPACE STDYS 400", "ASIAN-AM CULTURE", "ASIAN AMER THEATER", 
    "FIL-AM HIST & CLTR", "JPN-AM HIST CLTR", "AAS LAW AND IMMIGRA", 
    "SE-ASIAN AM HST/CLT", "RESEARCH SEMINARS", "ACCTG & FIN REPRTNG", 
    "ACCTG & FIN REPRTNG", "ACCTG & FIN REPRTNG", "MANAGERIAL ACCTG", 
    "INTERMED ACCTG I", "INTERMED ACCTG I", "INTERMED ACCTG I", 
    "INTERMED ACCTG II", "INTERMED ACCTG III", "INTERMED ACCTG III", 
    "COST ACCOUNTING", "ACCTG INFO SYSTEMS", "ACCTG INFO SYSTEMS", 
    "TAX EFF OF BUS DECS", "TAX EFF OF BUS DECS", "AUDIT STANDRDS PRIN", 
    "ACC & FIN MGT DEC", "ADV FIN ACCTG", "FINANCIAL ACCTG", 
    "FINANCIAL ACCTG", "INTEN ACCT PRIN 1", "CASES INFO QUALITY"
    ), credits = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_), student_count = c("25", "186", 
    "57", "57", "57", "19", "28", "24", "47", "29", "24", "17", 
    "15", "21", "21", "15", "20", "18", "22", "18", "25", "97", 
    "40", "44", "35", "40", "27", "36", "195", "194", "179", 
    "178", "57", "59", "53", "45", "44", "54", "54", "50", "15", 
    "56", "34", "36", "28", "39", "56", "55", "59", "47"), A = c("22", 
    "21", "7", "8", "3", "1", "4", "3", "10", "2", "10", "1", 
    "6", "NULL", "6", "7", "15", "13", "11", "12", "4", "NULL", 
    "6", "10", "4", "3", "4", "7", "25", "33", "14", "5", "3", 
    "NULL", "4", "4", "2", "2", "2", "8", "1", "4", "4", "2", 
    "3", "4", "4", "3", "2", "17"), Aminus = c("3", "30", "13", 
    "15", "19", "7", "4", "8", "14", "10", "12", "4", "3", "5", 
    "6", "3", "2", "1", "6", "4", "5", "14", "23", "13", "9", 
    "19", "9", "18", "36", "33", "42", "51", "11", "15", "13", 
    "13", "14", "15", "11", "19", "4", "27", "11", "15", "8", 
    "10", "28", "33", "26", "30"), Bplus = c("NULL", "40", "15", 
    "11", "19", "6", "5", "7", "11", "8", "1", "7", "2", "4", 
    "2", "NULL", "NULL", "2", "4", "1", "7", "30", "7", "6", 
    "3", "12", "3", "9", "21", "24", "25", "15", "12", "22", 
    "12", "5", "13", "7", "35", "9", "6", "13", "7", "10", "10", 
    "7", "17", "16", "15", "NULL"), B = c("NULL", "30", "10", 
    "14", "13", "3", "8", "4", "6", "5", "NULL", "4", "4", "2", 
    "3", "1", "2", "NULL", "NULL", "NULL", "3", "28", "2", "4", 
    "6", "6", "5", "2", "27", "30", "35", "29", "11", "10", "15", 
    "10", "6", "10", "4", "6", "2", "4", "9", "7", "4", "3", 
    "7", "3", "12", "NULL"), Bminus = c("NULL", "31", "8", "6", 
    "3", "1", "1", "2", "4", "2", "NULL", "1", "NULL", "7", "1", 
    "NULL", "NULL", "NULL", "1", "NULL", "4", "16", "1", "5", 
    "7", "NULL", "2", "NULL", "22", "23", "22", "45", "11", "6", 
    "4", "10", "5", "10", "1", "4", "2", "4", "1", "2", "3", 
    "7", "NULL", "NULL", "2", "NULL"), Cplus = c("NULL", "14", 
    "2", "2", "NULL", "1", "3", "NULL", "1", "2", "NULL", "NULL", 
    "NULL", "1", "NULL", "1", "NULL", "NULL", "NULL", "NULL", 
    "1", "4", "NULL", "1", "4", "NULL", "NULL", "NULL", "9", 
    "12", "15", "7", "6", "3", "2", "NULL", "2", "5", "NULL", 
    "2", "NULL", "3", "NULL", "NULL", "NULL", "3", "NULL", "NULL", 
    "NULL", "NULL"), C = c("NULL", "9", "2", "NULL", "NULL", 
    "NULL", "NULL", "NULL", "1", "NULL", "NULL", "NULL", "NULL", 
    "1", "NULL", "1", "NULL", "NULL", "NULL", "1", "NULL", "1", 
    "NULL", "1", "2", "NULL", "2", "NULL", "12", "11", "7", "12", 
    "NULL", "1", "2", "2", "1", "4", "NULL", "2", "NULL", "NULL", 
    "NULL", "NULL", "NULL", "3", "NULL", "NULL", "NULL", "NULL"
    ), Cminus = c("NULL", "4", "NULL", "1", "NULL", "NULL", "2", 
    "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", 
    "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", 
    "2", "NULL", "NULL", "NULL", "NULL", "23", "12", "6", "2", 
    "NULL", "2", "1", "NULL", "NULL", "NULL", "NULL", "NULL", 
    "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", 
    "NULL", "NULL"), Dplus = c("NULL", "1", "NULL", "NULL", "NULL", 
    "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", 
    "NULL", "NULL", "1", "NULL", "NULL", "NULL", "NULL", "NULL", 
    "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", 
    "NULL", "NULL", "2", "1", "NULL", "NULL", "NULL", "NULL", 
    "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", 
    "NULL", "NULL", "NULL", "NULL", "NULL"), D = c("NULL", "1", 
    "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", 
    "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", 
    "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", 
    "NULL", "NULL", "11", "7", "NULL", "2", "NULL", "NULL", "NULL", 
    "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", 
    "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL"), 
    Dminus = c("NULL", "NULL", "NULL", "NULL", "NULL", "NULL", 
    "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", 
    "1", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", 
    "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "3", 
    "NULL", "1", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", 
    "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", 
    "NULL", "NULL", "NULL", "NULL"), Fail = c("NULL", "NULL", 
    "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", 
    "NULL", "NULL", "NULL", "NULL", "2", "1", "NULL", "1", "NULL", 
    "NULL", "NULL", "2", "1", "NULL", "NULL", "NULL", "NULL", 
    "NULL", "3", "1", "1", "2", "NULL", "NULL", "NULL", "NULL", 
    "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "1", "NULL", 
    "NULL", "1", "NULL", "NULL", "NULL", "NULL"), W = c("NULL", 
    "5", "NULL", "NULL", "NULL", "NULL", "1", "NULL", "NULL", 
    "NULL", "1", "NULL", "NULL", "1", "NULL", "NULL", "1", "1", 
    "NULL", "NULL", "1", "2", "NULL", "2", "NULL", "NULL", "2", 
    "NULL", "6", "5", "12", "5", "2", "NULL", "NULL", "1", "1", 
    "1", "1", "NULL", "NULL", "1", "1", "NULL", "NULL", "1", 
    "NULL", "NULL", "2", "NULL"), avg_gpa = c(3.9, 3, 3.2, 3.2, 
    3.3, 3.3, 3.1, 3.4, 3.4, 3.3, 3.7, 3.3, 3.5, 3, 3.1, 3.1, 
    3.8, 3.6, 3.7, 3.7, 3.2, 3, 3.4, 3.3, 3, 3.4, 3.3, 3.6, 2.8, 
    2.9, 3, 2.9, 3, 3.1, 3.1, 3.1, 3.2, 3, 3.3, 3.3, 3.3, 3.4, 
    3.2, 3.3, 3.3, 3, 3.4, 3.4, 3.3, 3.7), professor_rating = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_), no_responses = c(NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), lastname = c(NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_
    ), firstname = c(NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_)), row.names = c(NA, 
50L), class = "data.frame")

我不知道如何做到这一点。 感谢您的帮助!

【问题讨论】:

  • 请提供生成数据框示例的代码以方便社区帮助您,您可以使用dput
  • @DavidS 感谢您的回复!我刚刚在我的问题中包含了 dput。
  • 对于dput,您已共享要替换哪些列?有哪些价值观? abbr 是什么?

标签: r dataframe dplyr tidyr


【解决方案1】:

您没有提供可用的示例,因此我在没有任何检查的情况下编写了它。我在考虑dplyr 中的mutate(),但在这个简单的案例中,R 应该可以解决问题。不过,如果我错了,请纠正我。

df$college[df&abbr=='ATM S'] = "college of the enviroment"
df$sub_college[df&abbr=='ATM S'] = "collge on the enviorment"
df$dept[df&abbr=='ATM S'] = "atmospheric sciences"

【讨论】:

    【解决方案2】:

    假设每个组的每一列总是有一个唯一值,我们可以使用dplyr,如下所示。

    replace_fun <- function(x) {
      replace_na(x, unique(x[!is.na(x)]))
    }
    
    df %>%
      group_by(abbr) %>%
      mutate_at(vars(college, sub_college, dept), replace_fun)
    

    【讨论】:

    • 实际上每个组的每列并不总是具有唯一值。有些组可能一直是 NA,在这种情况下,我希望保持原样。
    猜你喜欢
    • 1970-01-01
    • 2021-10-07
    • 2016-08-26
    • 2022-01-23
    • 2023-02-13
    • 1970-01-01
    • 2014-04-10
    • 2018-12-11
    • 1970-01-01
    相关资源
    最近更新 更多