【问题标题】:Adjusting Yahoo Stock Data Web Scraping to Loop over Dates调整雅虎股票数据网络抓取以循环日期
【发布时间】:2019-02-22 21:25:49
【问题描述】:

我正在使用与 here 相似的脚本。简而言之,代理问题(或其他问题)会使 API 超时。我不得不解析雅虎财经数据的网址,而不是使用 quantmod 来获取历史股票数据。因为 yahoo Finance 只加载 100 行,即使您将日期范围设置为超过 at,在您向下滚动之前,我需要让这个“for 循环”一次循环遍历我创建的 100 天的列表。开始日期采用 Yahoo Finance 使用的整数格式。

以下是 100 天增量的示例 df,列表将更改/增长。证券列表也是从文件中导入的,并且也会动态更改,但我在下面提供了“符号”作为示例。

在下文中,我希望 dateGroup[1,1] 和 dateGroup[1,2] 自动从 dateGroup 中获取第一行值,然后是第二行,等等 - 然后构建一个包含所有价值观。

dateGroup <- data.frame(
    start = c(1509519600, 1518159600,1526799600,1535439600,1544079600),
    end = c(1518073200, 1526713200,1535353200,1543993200,1550732400)
)

for (s in symbols){
    url <- paste('https://finance.yahoo.com/quote/',s, '/history?period1=',dateGroup[1,1],'&period2=',dateGroup[1,2],'&interval=1d&filter=history&frequency=1d',sep="")
    webpage <- readLines(url,warn=FALSE)
    html <- htmlTreeParse(webpage, useInternalNodes = TRUE, asText = TRUE)
    tableNodes <- getNodeSet(html, "//table")
    assign(s, readHTMLTable(tableNodes[[1]], header=c("Date","Open","High","Low","Close","Adj. Close","Volume")))

    df <- get(s)
    df['symbol'] <- s
    assign(s, df)
}

symboldatalist <- cbind(mget(symbols))
symboldata <- do.call(rbind, symboldatalist)

symboldata <- symboldata[, c(ncol(symboldata), 1:ncol(symboldata)-1)]

write.table(symboldata, "[Location], sep=",", row.names=FALSE, col.names=TRUE)

任何帮助都会很棒。谢谢!

我尝试了几件事。我试图创建一个 url 矩阵,并用顶部的符号和第 1 列和第 2 列中的日期刮掉那些。我还尝试在 Yahoo Finance 页面上编写一个自动滚动,但遇到了同样的错误,它超时了。

【问题讨论】:

    标签: r web-scraping stock


    【解决方案1】:

    考虑mapply 或其非简化的包装器Map,通过开始和结束日期以及相应符号的配对逐元素迭代。此外,避免使用assignget,并在最后为最终rbind 构建数据框列表:

    library(XML)
    ...
    dateGroup <- data.frame(
        start = c(1509519600, 1518159600, 1526799600, 1535439600, 1544079600),
        end = c(1518073200, 1526713200, 1535353200, 1543993200, 1550732400)
    )
    
    # CROSS JOIN ALL SYMBOLS WITH EACH DATE PAIRING
    dt_grp_sym <- merge(dateGroup, data.frame(symbols))
    
    # DEFINED METHOD FOR HTML PROCESSING
    proc_html <- function(sym, sd, ed) {    
        url <- paste0('https://finance.yahoo.com/quote/', sym, '/history?period1=',
                      sd, '&period2=', ed, '&interval=1d&filter=history&frequency=1d')
        print(url)
    
        webpage <- readLines(url, warn=FALSE)
        html <- htmlTreeParse(webpage, useInternalNodes = TRUE, asText = TRUE)
        tableNodes <- getNodeSet(html, "//table")
    
        html_df <- transform(readHTMLTable(tableNodes[[1]],
                                           header=c("Date", "Open", "High", "Low",
                                                    "Close", "Adj. Close", "Volume")),
                             symbol = sym)
        return(html_df)
    }
    
    # ITERATE ELEMENTWISE THROUGH EVERY ROW of dt_grp_sym
    df_list <- Map(proc_html, dt_grp_sym$symbols, dt_grp_sym$start, dt_grp_sym$end)
    
    final_df <- do.call(rbind, df_list)
    

    演示使用 1 类美国铁路:

    symbols <- c("UNP", "CSX", "NSC", "CNI", "KSU")
    
    dateGroup <- data.frame(
      start = c(1509519600, 1518159600, 1526799600, 1535439600, 1544079600),
      end = c(1518073200, 1526713200, 1535353200, 1543993200, 1550732400)
    )    
    dt_grp_sym <- merge(dateGroup, data.frame(symbols))
    
    # CALLING SAME ABOVE FUNCTION
    df_list <- with(dt_grp_sym, Map(proc_html, symbols, start, end))   
    final_df <- do.call(rbind, df_list)
    

    输出

    by(final_df, final_df$symbol, head)
    
    # final_df$symbol: CNI
    #              Date  Open  High   Low Close Adj..Close    Volume symbol
    # 998  Feb 08, 2018 76.08 76.16 74.11 74.45      72.79 1,508,100    CNI
    # 999  Feb 07, 2018 76.86 77.23 76.01 76.17      74.48 1,645,400    CNI
    # 1000 Feb 06, 2018 76.21 77.42 74.81 77.14      75.42 2,293,300    CNI
    # 1001 Feb 05, 2018 78.00 78.70 77.12 77.17      75.45 1,711,000    CNI
    # 1002 Feb 02, 2018 79.17 79.24 78.17 78.46      76.71 1,331,400    CNI
    # 1003 Feb 01, 2018 79.91 80.54 79.24 79.82      78.04 1,231,500    CNI
    # ------------------------------------------------------------------------------ 
    # final_df$symbol: CSX
    #             Date  Open  High   Low Close Adj..Close     Volume symbol
    # 333 Feb 08, 2018 52.91 53.16 50.46 50.47      49.80  7,798,100    CSX
    # 334 Feb 07, 2018 53.38 54.36 52.94 52.97      52.26  6,496,200    CSX
    # 335 Feb 06, 2018 51.27 54.00 50.12 53.82      53.10 10,563,700    CSX
    # 336 Feb 05, 2018 54.89 55.04 51.96 51.99      51.30  9,070,200    CSX
    # 337 Feb 02, 2018 56.19 56.35 55.20 55.25      54.51  9,275,800    CSX
    # 338 Feb 01, 2018 56.10 57.10 56.04 56.58      55.83  4,079,100    CSX
    # ------------------------------------------------------------------------------ 
    # final_df$symbol: KSU
    #              Date   Open   High    Low  Close Adj..Close    Volume symbol
    # 1330 Feb 08, 2018 107.17 107.64 103.50 103.53     102.15 1,434,600    KSU
    # 1331 Feb 07, 2018 106.59 108.27 106.59 107.10     105.67 1,326,800    KSU
    # 1332 Feb 06, 2018 103.11 108.02 102.07 107.32     105.89 1,459,400    KSU
    # 1333 Feb 05, 2018 109.73 110.44 105.12 105.18     103.77 1,272,100    KSU
    # 1334 Feb 02, 2018 112.06 112.85 110.03 110.15     108.68 1,051,900    KSU
    # 1335 Feb 01, 2018 112.80 114.00 112.17 112.87     111.36 1,011,200    KSU
    # ------------------------------------------------------------------------------ 
    # final_df$symbol: NSC
    #             Date   Open   High    Low  Close Adj..Close    Volume symbol
    # 665 Feb 08, 2018 142.62 143.27 136.87 136.89     134.22 2,657,200    NSC
    # 666 Feb 07, 2018 142.09 144.45 141.37 142.68     139.89 1,464,500    NSC
    # 667 Feb 06, 2018 136.99 143.45 134.55 143.05     140.26 2,455,000    NSC
    # 668 Feb 05, 2018 144.74 146.73 138.18 138.61     135.90 2,508,900    NSC
    # 669 Feb 02, 2018 147.15 147.85 144.61 145.03     142.20 1,774,600    NSC
    # 670 Feb 01, 2018 149.28 150.35 147.90 148.47     145.57 1,427,000    NSC
    # ------------------------------------------------------------------------------ 
    # final_df$symbol: UNP
    #           Date   Open   High    Low  Close Adj..Close     Volume symbol
    # 1 Feb 08, 2018 128.70 128.70 124.81 124.86     122.27  6,325,100    UNP
    # 2 Feb 07, 2018 130.34 131.82 128.94 128.96     126.29  5,053,000    UNP
    # 3 Feb 06, 2018 122.28 131.50 121.50 131.15     128.43 15,734,300    UNP
    # 4 Feb 05, 2018 128.59 131.78 124.13 124.14     121.57  6,744,400    UNP
    # 5 Feb 02, 2018 131.66 131.73 127.22 129.36     126.68  8,181,200    UNP
    # 6 Feb 01, 2018 132.51 133.74 131.86 132.38     129.64  5,597,600    UNP
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2015-06-11
      • 1970-01-01
      相关资源
      最近更新 更多