该网站发布 GET 请求以将 JSON 数据更新到表中。经过一番尝试,这是我想出的处理 JSON 数据的代码:(不是一个漂亮的代码,但它确实有效)
library(rjson)
library(rvest)
library(writexl)
lastpage <- 9;
df <- data.frame();
for (i in 1:lastpage){
x <- fromJSON(file = paste("https://etfdb.com/data_set/?tm=40274&cond={%22by_stock%22:25}&no_null_sort=&count_by_id=true&limit=25&sort=weighting&order=desc&limit=25&offset=", 25 * (i-1), sep = ""));
x <- x[2][[1]];
pg_df <- data.frame(matrix(unlist(x), nrow=length(x), byrow=T),stringsAsFactors=FALSE);
df <- rbind(df, pg_df);
}
for (i in 1:nrow(df)){
df$X1[i] <- read_html(df$X1[i]) %>% html_text(trim = TRUE);
df$X3[i] <- read_html(df$X3[i]) %>% html_text(trim = TRUE);
df$X5[i] <- read_html(df$X5[i]) %>% html_text(trim = TRUE);
}
df <- data.frame(df$X1, df$X3, df$X5, df$X7, df$X9);
colnames(df) <- c("Ticker", "ETF", "ETFdb.com Category", "Expense Ratio", "Weighting");
write_xlsx(
df,
path = "stock.xlsx",
col_names = TRUE,
format_headers = TRUE,
use_zip64 = FALSE
)
更新:
您可以在此处表的属性data-url 中查看数据源:
我将更新代码,让您更轻松:
library(rjson)
library(rvest)
library(writexl)
stock_ticket <- "AAPL";
url <- paste("https://etfdb.com/stock/", stock_ticket, sep = "");
lastpage <- 9;
df <- data.frame();
data_url <- read_html(url) %>% html_node(xpath = "//table[@id='etfs']") %>% html_attr("data-url");
for (i in 1:lastpage){
x <- fromJSON(file = paste("https://etfdb.com", data_url, "&offset=", 25 * (i-1), sep = ""));
x <- x[2][[1]];
pg_df <- data.frame(matrix(unlist(x), nrow=length(x), byrow=T),stringsAsFactors=FALSE);
df <- rbind(df, pg_df);
}
for (i in 1:nrow(df)){
df$X1[i] <- read_html(df$X1[i]) %>% html_text(trim = TRUE);
df$X3[i] <- read_html(df$X3[i]) %>% html_text(trim = TRUE);
df$X5[i] <- read_html(df$X5[i]) %>% html_text(trim = TRUE);
}
df <- data.frame(df$X1, df$X3, df$X5, df$X7, df$X9);
colnames(df) <- c("Ticker", "ETF", "ETFdb.com Category", "Expense Ratio", "Weighting");
write_xlsx(
df,
path = "stock.xlsx",
col_names = TRUE,
format_headers = TRUE,
use_zip64 = FALSE
)