【问题标题】:how do you extract data from nested json data in R你如何从R中的嵌套json数据中提取数据
【发布时间】:2016-04-14 16:55:01
【问题描述】:

我需要能够从这个包含许多 json 条目的文件中提取这些字段:

sender: Hostname
mem:used_p
cpu: user_p
load: load_5

//

cat tmp.txt

{"senderDateTimeStamp":"2016-04-07T00:00:00.0093","senderHost":"server1","senderAppcode":"test_infrastats_prod","senderUsecase":"system","destinationTopic":"test_serverstats_realtimedata_topic_prod","correlatedRecord":false,"needCorrelationCacheCleanup":false,"needCorrelation":false,"correlationAttributes":null,"correlationRecordCount":0,"correlateTimeWindowInMills":0,"lastCorrelationRecord":false,"realtimeESStorage":true,"receiverDateTimeStamp":1460001606277,"payloadData":{"timestamp":"2016-04-07T00:00:00.093","sender":{"name":"server1","hostname":"server1"},"count":"1","shipper":"server1","mem":{"total":"18855256064","free":"7273639936","actual_used":"3755769856","used_p":"0.6242380717975277","actual_free":"15099486208","used":"11581616128","actual_used_p":"0.2091895439262065"},"cpu":{"steal":"0","idle":"5102727720","system":"16658360","softirq":"13824070","irq":"1659250","system_p":"0.012666049012784248","nice":"32210","iowait":"660220","user_p":"0.18809078763071663","user":"1112770410"},"load":{"load1":"1.54","load15":"1.11","load5":"1.2"},"swap":{"total":"18855256064","free":"1044598784","actual_used":"0","used_p":"0.0","actual_free":"0","used":"11581616128"},"type":"system"},"payloadDataText":null,"key":"test_infrastats_prod:system","destinationTopicName":"test_serverstats_realtimedata_topic_prod","hdfsPath":"test_infrastats_prod/system","esindex":"test_infrastats_prod","estype":"system","appCode":"test_infrastats_prod","useCase":"system"}

{"senderDateTimeStamp":"2016-04-07T00:00:00.0093","senderHost":"server1","senderAppcode":"test_infrastats_prod","senderUsecase":"system","destinationTopic":"test_serverstats_realtimedata_topic_prod","correlatedRecord":false,"needCorrelationCacheCleanup":false,"needCorrelation":false,"correlationAttributes":null,"correlationRecordCount":0,"correlateTimeWindowInMills":0,"lastCorrelationRecord":false,"realtimeESStorage":true,"receiverDateTimeStamp":1460001606277,"payloadData":{"timestamp":"2016-04-07T00:00:00.093","sender":{"name":"server1","hostname":"server1"},"count":"1","shipper":"server1","mem":{"total":"18855256064","free":"7273639936","actual_used":"3755769856","used_p":"0.6242380717975277","actual_free":"15099486208","used":"11581616128","actual_used_p":"0.2091895439262065"},"cpu":{"steal":"0","idle":"5102727720","system":"16658360","softirq":"13824070","irq":"1659250","system_p":"0.012666049012784248","nice":"32210","iowait":"660220","user_p":"0.18809078763071663","user":"1112770410"},"load":{"load1":"1.54","load15":"1.11","load5":"1.2"},"swap":{"total":"18855256064","free":"1044598784","actual_used":"0","used_p":"0.0","actual_free":"0","used":"11581616128"},"type":"system"},"payloadDataText":null,"key":"test_infrastats_prod:system","destinationTopicName":"test_serverstats_realtimedata_topic_prod","hdfsPath":"test_infrastats_prod/system","esindex":"test_infrastats_prod","estype":"system","appCode":"test_infrastats_prod","useCase":"system"}

{"senderDateTimeStamp":"2016-04-07T00:00:00.0093","senderHost":"server1","senderAppcode":"test_infrastats_prod","senderUsecase":"system","destinationTopic":"test_serverstats_realtimedata_topic_prod","correlatedRecord":false,"needCorrelationCacheCleanup":false,"needCorrelation":false,"correlationAttributes":null,"correlationRecordCount":0,"correlateTimeWindowInMills":0,"lastCorrelationRecord":false,"realtimeESStorage":true,"receiverDateTimeStamp":1460001606277,"payloadData":{"timestamp":"2016-04-07T00:00:00.093","sender":{"name":"server1","hostname":"server1"},"count":"1","shipper":"server1","mem":{"total":"18855256064","free":"7273639936","actual_used":"3755769856","used_p":"0.6242380717975277","actual_free":"15099486208","used":"11581616128","actual_used_p":"0.2091895439262065"},"cpu":{"steal":"0","idle":"5102727720","system":"16658360","softirq":"13824070","irq":"1659250","system_p":"0.012666049012784248","nice":"32210","iowait":"660220","user_p":"0.18809078763071663","user":"1112770410"},"load":{"load1":"1.54","load15":"1.11","load5":"1.2"},"swap":{"total":"18855256064","free":"1044598784","actual_used":"0","used_p":"0.0","actual_free":"0","used":"11581616128"},"type":"system"},"payloadDataText":null,"key":"test_infrastats_prod:system","destinationTopicName":"test_serverstats_realtimedata_topic_prod","hdfsPath":"test_infrastats_prod/system","esindex":"test_infrastats_prod","estype":"system","appCode":"test_infrastats_prod","useCase":"system"}

我可以像这样在 payloadData 部分中提取数据:

df <- jsonlite::fromJSON(paste0("[",paste0(readLines("c:/tmp.txt"),collapse=","),"]"))$payloadData[c("timestamp","count")]

但是payloadData部分中有嵌套对象,我如何从mem,cpu,load部分中提取json数据中嵌套部分的数据?

【问题讨论】:

  • IIRC fromJSON 中有一个 flatten 参数,它应该将所有值都放在可以从通常的子集方法访问的顶层。 (在家里,如果没有答案,明天会检查)
  • @Tensibai,感谢 flatten=TRUE 解决了我的问题。

标签: json r jsonlite


【解决方案1】:

一种方法是使用tidyjson:

library(tidyjson)
library(magrittr)

json <- '{"senderDateTimeStamp":"2016-04-07T00:00:00.0093","senderHost":"server1","senderAppcode":"test_infrastats_prod","senderUsecase":"system","destinationTopic":"test_serverstats_realtimedata_topic_prod","correlatedRecord":false,"needCorrelationCacheCleanup":false,"needCorrelation":false,"correlationAttributes":null,"correlationRecordCount":0,"correlateTimeWindowInMills":0,"lastCorrelationRecord":false,"realtimeESStorage":true,"receiverDateTimeStamp":1460001606277,"payloadData":{"timestamp":"2016-04-07T00:00:00.093","sender":{"name":"server1","hostname":"server1"},"count":"1","shipper":"server1","mem":{"total":"18855256064","free":"7273639936","actual_used":"3755769856","used_p":"0.6242380717975277","actual_free":"15099486208","used":"11581616128","actual_used_p":"0.2091895439262065"},"cpu":{"steal":"0","idle":"5102727720","system":"16658360","softirq":"13824070","irq":"1659250","system_p":"0.012666049012784248","nice":"32210","iowait":"660220","user_p":"0.18809078763071663","user":"1112770410"},"load":{"load1":"1.54","load15":"1.11","load5":"1.2"},"swap":{"total":"18855256064","free":"1044598784","actual_used":"0","used_p":"0.0","actual_free":"0","used":"11581616128"},"type":"system"},"payloadDataText":null,"key":"test_infrastats_prod:system","destinationTopicName":"test_serverstats_realtimedata_topic_prod","hdfsPath":"test_infrastats_prod/system","esindex":"test_infrastats_prod","estype":"system","appCode":"test_infrastats_prod","useCase":"system"}'

json %>%
  enter_object("payloadData") %>%
  spread_values(send_host = jstring("sender", "hostname"),
                mem_used_p = jstring("mem", "used_p"),
                cpu_user_p = jstring("cpu", "user_p"),
                load_load_5 = jstring("load","load5"))

#   document.id send_host         mem_used_p          cpu_user_p load_load_5
# 1           1   server1 0.6242380717975277 0.18809078763071663         1.2

或者,您可以堆叠每组键:

payload <- json %>% enter_object("payloadData")

sender_keys <- payload %>%
  enter_object("sender") %>%
  gather_keys() %>%
  append_values_string()

mem_keys <- payload %>%
  enter_object("mem") %>%
  gather_keys() %>%
  append_values_string()

cpu_keys <- payload %>%
  enter_object("cpu") %>%
  gather_keys() %>%
  append_values_string()

load_keys <- payload %>%
  enter_object("load") %>%
  gather_keys() %>%
  append_values_string()

【讨论】:

  • 感谢您的帖子。一件事是我将读取一堆文件,文件条目将是我插入的 json 数据。您能否修改您的代码以反映读取文件,并且每一行都是 json 数据。
  • 简答,“否” - 更长的答案,这不是你的问题。如果是,那么您应该编辑问题或提出新问题。最长的答案 == 只需将上面的结果保存到变量 row 中,然后将每个文件中的行绑定在一起。您可能希望使用循环或 apply 函数之一。
猜你喜欢
  • 2021-10-01
  • 2019-09-16
  • 1970-01-01
  • 2020-06-28
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 2022-07-12
  • 2019-10-06
相关资源
最近更新 更多