【问题标题】:lapply is converting strings to factorslapply 正在将字符串转换为因子
【发布时间】:2016-01-20 12:56:00
【问题描述】:

我有以下列表元素(一个由字符串和数字单元组成的数据框,其中

as.data.frame(lapply(t1[3], function(y) gsub("\\s+", " ", y)),stringsAsFactors = FALSE)

产生一个数字数据框

1                 c(84, 85, 1, 2, 3, 4, 5, 85, 6, 7, 8, 9, 85, 10, 11, 12, 13, 85, 14, 15, 16, 17, 85, 18, 85, 19, 20, 21, 22, 23, 24, 85, 25, 26, 27, 28, 29, 30, 31, 32, 33, 85, 34, 35, 85, 36, 37, 38, 39, 40, 85, 41, 42, 43, 44, 45, 85, 46, 47, 85, 48, 49, 85, 50, 51, 85, 52, 53, 54, 85, 55, 56, 85, 57, 58, 59, 85, 60, 61, 62, 85, 63, 85, 64, 85, 65, 85, 66, 85, 67, 85, 68, 85, 69, 85, 70, 85, 71, 85, 72, 85, 73, 85, 74, 75, 76, 85, 77, 85, 78, 79, 80, 81, 82, 83)
2 c(66, 114, 64, 21, 2, 102, 115, 52, 46, 47, 22, 53, 38, 80, 20, 39, 40, 62, 105, 28, 106, 63, 6, 7, 103, 67, 108, 71, 113, 112, 104, 10, 33, 15, 92, 97, 107, 55, 56, 41, 11, 36, 60, 37, 90, 70, 54, 8, 61, 91, 85, 3, 87, 75, 57, 86, 29, 69, 30, 59, 77, 74, 65, 99, 58, 93, 9, 19, 94, 34, 25, 35, 12, 68, 111, 13, 72, 98, 14, 73, 42, 43, 109, 110, 23, 24, 88, 89, 78, 79, 48, 49, 50, 51, 81, 82, 95, 96, 44, 45, 31, 32, 83, 16, 1, 84, 4, 5, 100, 17, 18, 101, 76, 27, 26)
3                                                                                                                           c(2, 3, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 3, 1, 1, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1)
4                                                                                                                           c(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)

我只是将数据帧中的多个空格换成一个空格,那么为什么我要返回数字呢?我应该期待字符串

t1[3]的dput输出:

dput(t1[3])
structure(list(`NULL` = structure(list(V1 = structure(c(84L, 
85L, 1L, 2L, 3L, 4L, 5L, 85L, 6L, 7L, 8L, 9L, 85L, 10L, 11L, 
12L, 13L, 85L, 14L, 15L, 16L, 17L, 85L, 18L, 85L, 19L, 20L, 21L, 
22L, 23L, 24L, 85L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 
85L, 34L, 35L, 85L, 36L, 37L, 38L, 39L, 40L, 85L, 41L, 42L, 43L, 
44L, 45L, 85L, 46L, 47L, 85L, 48L, 49L, 85L, 50L, 51L, 85L, 52L, 
53L, 54L, 85L, 55L, 56L, 85L, 57L, 58L, 59L, 85L, 60L, 61L, 62L, 
85L, 63L, 85L, 64L, 85L, 65L, 85L, 66L, 85L, 67L, 85L, 68L, 85L, 
69L, 85L, 70L, 85L, 71L, 85L, 72L, 85L, 73L, 85L, 74L, 75L, 76L, 
85L, 77L, 85L, 78L, 79L, 80L, 81L, 82L, 83L), .Label = c("01A", 
"01B", "01C", "01D", "01Z", "02A", "02B", "02C", "02Z", "03A", 
"03B", "03C", "03Z", "04A", "04B", "04C", "04Z", "05Z", "06A", 
"06B", "06C", "06D", "06E", "06Z", "07A", "07B", "07C", "07D", 
"07E", "07F", "07G", "07H", "07Z", "08A", "08Z", "09A", "09B", 
"09C", "09D", "09Z", "10A", "10B", "10C", "10D", "10Z", "11A", 
"11Z", "12A", "12B", "13A", "13Z", "14A", "14B", "14Z", "15A", 
"15Z", "16A", "16B", "16Z", "17A", "17B", "17Z", "18Z", "19Z", 
"20Z", "21Z", "22Z", "23Z", "24Z", "25Z", "26Z", "27Z", "28Z", 
"29A", "29B", "29Z", "30Z", "31A", "31B", "31Z", "97Z", "98Z", 
"99Z", "Value", " "), class = "factor"), V2 = structure(c(66L, 
114L, 64L, 21L, 2L, 102L, 115L, 52L, 46L, 47L, 22L, 53L, 38L, 
80L, 20L, 39L, 40L, 62L, 105L, 28L, 106L, 63L, 6L, 7L, 103L, 
67L, 108L, 71L, 113L, 112L, 104L, 10L, 33L, 15L, 92L, 97L, 107L, 
55L, 56L, 41L, 11L, 36L, 60L, 37L, 90L, 70L, 54L, 8L, 61L, 91L, 
85L, 3L, 87L, 75L, 57L, 86L, 29L, 69L, 30L, 59L, 77L, 74L, 65L, 
99L, 58L, 93L, 9L, 19L, 94L, 34L, 25L, 35L, 12L, 68L, 111L, 13L, 
72L, 98L, 14L, 73L, 42L, 43L, 109L, 110L, 23L, 24L, 88L, 89L, 
78L, 79L, 48L, 49L, 50L, 51L, 81L, 82L, 95L, 96L, 44L, 45L, 31L, 
32L, 83L, 16L, 1L, 84L, 4L, 5L, 100L, 17L, 18L, 101L, 76L, 27L, 
26L), .Label = c("Abdominal \r\n    Pain", "Abrasion", "Alcohol", 
"Allergy (including \r\n      Anaphylaxis)", "Allergy (including \r\n      Anaphylaxis), other or unspecified", 
"Amputation", "Amputation, other or \r\n      unspecified", "Animal Bite", 
"Asthma", "Burns, Scalds and \r\n      Thermal Conditions", "Burns, Scalds and Thermal \r\n      Conditions, other or unspecified", 
"Cardiovascular \r\n      Conditions", "Cardiovascular Conditions, \r\n      other or unspecified", 
"Cerebrovascular \r\n      Event", "Chemical", "Chest Pain, non \r\n      cardiac", 
"Chronic Alcohol \r\n      Abuse", "Chronic Drug \r\n      Abuse", 
"Chronic Obstructive \r\n      Pulmonary disease", "Closed \r\n    Fracture", 
"Contusion", "Dental \r\nInjury", "Dermatological \r\n      Conditions", 
"Dermatological Conditions, \r\n      other or unspecified", 
"Diabetes", "Diagnosis Not \r\n      Recorded", "Diagnosis Type Not \r\n      Otherwise Specified", 
"Dislocation", "Drowning", "Drowning, other or \r\n      unspecified", 
"Ear, Nose and Throat \r\n      Conditions", "Ear, Nose and Throat \r\n      Conditions, other or unspecified", 
"Electric", "Endocrinological \r\n      Conditions", "Endocrinological \r\n      Conditions, other or unspecified", 
"Foreign \r\n      Body", "Foreign Body, other or \r\n      unspecified", 
"Fracture", "Fracture \r\n      Dislocation", "Fracture, other or \r\n      unspecified", 
"Frostbite", "Gastrointestinal \r\n      Conditions", "Gastrointestinal \r\n      Conditions, other or unspecified", 
"Genito-Urinary \r\n      Medicine", "Genito-urinary Medicine, \r\n      other or unspecified", 
"Glasgow Coma Score \r\n      15", "Glasgow Coma Score \r\n      <15", 
"Gynaecological \r\n      Conditions", "Gynaecological Conditions, \r\n      other or unspecified", 
"Haematological \r\n      Conditions", "Haematological Conditions, \r\n      other or unspecified", 
"Head \r\n      Injury", "Head Injury, other or \r\n      unspecified", 
"Human Bite", "Hyperthermia", "Hypothermia", "Illicit \r\n      Drug", 
"Infection, other or \r\n      unspecified", "Infectious \r\n      Disease", 
"Ingested Foreign \r\n      Body", "Insect Bite or \r\n      Sting", 
"Joint \r\n      Injury", "Joint Injury, other or \r\n      unspecified", 
"Laceration", "Local \r\n      Infection", "Meaning", "Muscle \r\nInjury", 
"Myocardial \r\n      Infarction", "Near \r\nDrowning", "Needle Stick \r\n      Injury", 
"Nerve Injury", "Neurological Conditions", "Neurological Conditions, \r\n      other or unspecified", 
"Non-notifiable \r\n      Disease", "Non-prescribed/purchased \r\n      drug", 
"Nothing Abnormal \r\n      Detected", "Notifiable \r\n      Disease", 
"Obstetric \r\n      Conditions", "Obstetric Conditions, \r\n      other or unspecified", 
"Open \r\nFracture", "Ophthalmic \r\n      Conditions", "Ophthalmic Conditions, \r\n      other or unspecified", 
"Pain", "Pain, other or \r\n      unspecified", "Poisoning or \r\n      Overdose", 
"Poisoning or Overdose, \r\n      other or unspecified", "Prescribed \r\n      Drug", 
"Psychological/Psychiatric \r\n    Conditions", "Psychological/Psychiatric \r\n      Conditions, other or unspecified", 
"Puncture \r\n      Wounds", "Puncture Wounds, other or \r\n      unspecified", 
"Radiation", "Respiratory \r\n      Conditions", "Respiratory Conditions, \r\n      other or unspecified", 
"Rheumatological \r\n      Conditions", "Rheumatological \r\n      Conditions, other or unspecified", 
"Scald", "Seizure/Convulsion", "Septicaemia", "Social \r\n      Problems/Homelessness", 
"Social \r\n      Problems/Homelessness, other or unspecified", 
"Soft tissue \r\n      inflammation", "Soft Tissue \r\n      Injury", 
"Soft Tissue Injury, other \r\n      or unspecified", "Sprain", 
"Subluxation", "Sunburn", "Tendon \r\nInjury", "Urological Conditions", 
"Urological Conditions, \r\n      other or unspecified", "Vascular Condition", 
"Vascular \r\n    Injury", "Visceral \r\n    Injury", "Wound", 
"Wound, other or \r\n      unspecified"), class = "factor"), 
    V3 = structure(c(2L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 
    1L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 1L, 3L, 
    1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 3L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 
    1L, 3L, 1L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 
    1L, 1L, 3L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 1L, 3L, 1L, 3L, 
    1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 
    3L, 1L, 3L, 1L, 1L, 1L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L
    ), .Label = c("1st July 2010", "Valid From", " "), class = "factor"), 
    V4 = structure(c(1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
    ), .Label = c("Valid To", " "), class = "factor")), .Names = c("V1", 
"V2", "V3", "V4"), row.names = c(NA, -115L), class = "data.frame")), .Names = "NULL")

【问题讨论】:

    标签: r lapply


    【解决方案1】:

    t1[3] 仍然是带有一个 list 元素的 list。我们可以选择元素,然后循环遍历“data.frame”的列,使用gsub。输出将是list。这可以用data.frame 包装。

    lapply(t1[3][[1]], function(y) gsub("\\s+", " ", y))
    

    【讨论】:

    • 这里可能不需要定义匿名函数。
    • 感谢 akrun 这个作品。我想我只是将t1[3] 视为列表的一个元素而已。
    • @DavidArenburg 是的,不需要,但我只是想使用 OP 的代码来显示差异。
    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 2013-07-15
    • 2011-02-20
    • 1970-01-01
    • 1970-01-01
    • 2018-07-25
    • 2016-04-13
    相关资源
    最近更新 更多