【问题标题】:Setting column names when using bind_cols (r, dplyr)使用 bind_cols (r, dplyr) 时设置列名
【发布时间】:2017-06-28 21:42:33
【问题描述】:

我有一个 data.frame (df),其中包含另一个名为 url_variables 的 data.frame。

url_variables = df$url_variables

url_variables 包含许多其他 data.frames,例如 sourcecampaignpage 等等。这些数据帧中的每一个都有 3 列 keyvaluetype。我正在使用 bind_cols 将所有单独的数据帧组合成一个平面数据帧。

flat_url_variables = bind_cols(as.list(url_variables))

问题是 flat_url_variables 的列名是非描述性的 keyvaluetypekeyvaluetype 等。如何根据每个单独的 url 变量 data.frame 的名称命名 flat_url_variables 的列(source_keysource_valuesource_typecampaign_key 等)?

编辑 这是url_variables 的小样本图片:

这是dput(head(url_variables))的输出:

structure(list(`_privatedomain` = structure(list(key = c("_privatedomain", 
"_privatedomain", "_privatedomain", "_privatedomain", "_privatedomain", 
"_privatedomain"), value = c("t", "t", "t", "t", "t", "t"), type = c("url", 
"url", "url", "url", "url", "url")), .Names = c("key", "value", 
"type"), row.names = c(NA, 6L), class = "data.frame"), p = structure(list(
    key = c("p", NA, NA, "p", "p", "p"), value = c("2", NA, NA, 
    "2", "2", "2"), type = c("url", NA, NA, "url", "url", "url"
    )), .Names = c("key", "value", "type"), row.names = c(NA, 
6L), class = "data.frame"), s = structure(list(key = c("s", NA, 
NA, "s", "s", "s"), value = c("incomplete", NA, NA, "incomplete", 
"incomplete", "incomplete"), type = c("url", NA, NA, "url", "url", 
"url")), .Names = c("key", "value", "type"), row.names = c(NA, 
6L), class = "data.frame"), first_name = structure(list(key = c("first_name", 
NA, NA, "first_name", "first_name", "first_name"), value = c("Allan", 
NA, NA, "james", "Sheryl", "Yara"), type = c("url", NA, NA, 
"url", "url", "url")), .Names = c("key", "value", "type"), row.names = c(NA, 
6L), class = "data.frame"), last_name = structure(list(key = c("last_name", 
NA, NA, "last_name", "last_name", "last_name"), value = c("Smith", 
NA, NA, "jones", "Smith", "Keating"), type = c("url", NA, 
NA, "url", "url", "url")), .Names = c("key", "value", "type"), row.names = c(NA, 
6L), class = "data.frame"), email = structure(list(key = c("email", 
NA, NA, "email", "email", "email"), value = c("Allan@email.com", 
NA, NA, "james@email.com", "sheryl@email", "Yara@email.com"
), type = c("url", NA, NA, "url", "url", "url")), .Names = c("key", 
"value", "type"), row.names = c(NA, 6L), class = "data.frame"), 
    phone_number = structure(list(key = c("phone_number", NA, 
    NA, "phone_number", "phone_number", "phone_number"), value = c("0401234567", 
    NA, NA, "0401234567", "0401234567", "0401234567"), type = c("url", 
    NA, NA, "url", "url", "url")), .Names = c("key", "value", 
    "type"), row.names = c(NA, 6L), class = "data.frame"), from = structure(list(
        key = c("from", NA, NA, "from", "from", "from"), value = c("landing_page", 
        NA, NA, "landing_page", "landing_page", "landing_page"
        ), type = c("url", NA, NA, "url", "url", "url")), .Names = c("key", 
    "value", "type"), row.names = c(NA, 6L), class = "data.frame"), 
    snc = structure(list(key = c(NA, NA, "snc", NA, NA, NA), 
        value = c(NA, NA, "1495606827_5925262b571d70.64387871", 
        NA, NA, NA), type = c(NA, NA, "url", NA, NA, NA)), .Names = c("key", 
    "value", "type"), row.names = c(NA, 6L), class = "data.frame"), 
    `__sgtarget` = structure(list(key = c(NA, NA, "__sgtarget", 
    NA, NA, NA), value = c(NA, NA, "10", NA, NA, NA), type = c(NA, 
    NA, "url", NA, NA, NA)), .Names = c("key", "value", "type"
    ), row.names = c(NA, 6L), class = "data.frame"), customertime = structure(list(
        key = c(NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_), value = c(NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_), type = c(NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_
        )), .Names = c("key", "value", "type"), row.names = c(NA, 
    6L), class = "data.frame"), sotime = structure(list(key = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_), value = c(NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), type = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_)), .Names = c("key", "value", "type"), row.names = c(NA, 
    6L), class = "data.frame"), cancelreschedulelink = structure(list(
        key = c(NA_character_, NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_), value = c(NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_, 
        NA_character_), type = c(NA_character_, NA_character_, 
        NA_character_, NA_character_, NA_character_, NA_character_
        )), .Names = c("key", "value", "type"), row.names = c(NA, 
    6L), class = "data.frame"), params = structure(list(key = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_), value = c(NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), type = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_)), .Names = c("key", "value", "type"), row.names = c(NA, 
    6L), class = "data.frame"), icslink = structure(list(key = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_), value = c(NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), type = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_)), .Names = c("key", "value", "type"), row.names = c(NA, 
    6L), class = "data.frame"), type = structure(list(key = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_), value = c(NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), type = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_)), .Names = c("key", "value", "type"), row.names = c(NA, 
    6L), class = "data.frame")), .Names = c("_privatedomain", 
"p", "s", "first_name", "last_name", "email", "phone_number", 
"from", "snc", "__sgtarget", "customertime", "sotime", "cancelreschedulelink", 
"params", "icslink", "type"), row.names = c(NA, 6L), class = "data.frame")

【问题讨论】:

  • 发布您的数据样本?
  • 最好的方法是什么?
  • For 循环 +cbind 可能有帮助
  • dput(head( url_variables))
  • 添加样本数据

标签: r dplyr


【解决方案1】:

最直接的方法可能是自己设置名称。这里我用lapply遍历url_variables的每一列,直接设置名字,然后绑定结果:

flat <-
  names(url_variables) %>%
  lapply(function(thisVar){
    url_variables[[thisVar]] %>%
      setNames(paste0(thisVar, "_", names(.)))
  }) %>%
  bind_cols()

我们可以看到flat[ , 1:6]给出的部分结果:

  _privatedomain_key _privatedomain_value _privatedomain_type p_key p_value p_type
1     _privatedomain                    t                 url     p       2    url
2     _privatedomain                    t                 url  <NA>    <NA>   <NA>
3     _privatedomain                    t                 url  <NA>    <NA>   <NA>
4     _privatedomain                    t                 url     p       2    url
5     _privatedomain                    t                 url     p       2    url
6     _privatedomain                    t                 url     p       2    url

【讨论】:

  • 谢谢马克这工作得很好。我不熟悉 setNames,所以我也学会了。
猜你喜欢
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 2015-03-12
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
相关资源
最近更新 更多