【发布时间】:2020-09-23 04:01:25
【问题描述】:
我正在使用此代码循环访问多个 url 以抓取数据。该代码可以正常工作,直到遇到缺少数据的日期。这是弹出的错误信息:
data.frame(away, home, away1H, home1H, awayPinnacle, homePinnacle) 中的错误: 参数意味着不同的行数:7、8
我对编码非常陌生,尽管缺少数据,但我不知道如何让它继续抓取。
library(rvest)
library(dplyr)
get_data <- function(date) {
# Specifying URL
url <- paste0('https://classic.sportsbookreview.com/betting-odds/nba-basketball/money-line/1st-half/?date=', date)
# Reading the HTML code from website
oddspage <- read_html(url)
# Using CSS selectors to scrape away teams
awayHtml <- html_nodes(oddspage,'.eventLine-value:nth-child(1) a')
#Using CSS selectors to scrape 1Q scores
away1QHtml <- html_nodes(oddspage,'.current-score+ .first')
away1Q <- html_text(away1QHtml)
away1Q <- as.numeric(away1Q)
home1QHtml <- html_nodes(oddspage,'.score-periods+ .score-periods .current-score+ .period')
home1Q <- html_text(home1QHtml)
home1Q <- as.numeric(home1Q)
#Using CSS selectors to scrape 2Q scores
away2QHtml <- html_nodes(oddspage,'.first:nth-child(3)')
away2Q <- html_text(away2QHtml)
away2Q <- as.numeric(away2Q)
home2QHtml <- html_nodes(oddspage,'.score-periods+ .score-periods .period:nth-child(3)')
home2Q <- html_text(home2QHtml)
home2Q <- as.numeric(home2Q)
#Creating First Half Scores
away1H <- away1Q + away2Q
home1H <- home1Q + home2Q
#Using CSS selectors to scrape scores
awayScoreHtml <- html_nodes(oddspage,'.first.total')
awayScore <- html_text(awayScoreHtml)
awayScore <- as.numeric(awayScore)
homeScoreHtml <- html_nodes(oddspage, '.score-periods+ .score-periods .total')
homeScore <- html_text(homeScoreHtml)
homeScore <- as.numeric(homeScore)
# Converting away data to text
away <- html_text(awayHtml)
# Using CSS selectors to scrape home teams
homeHtml <- html_nodes(oddspage,'.eventLine-value+ .eventLine-value a')
# Converting home data to text
home <- html_text(homeHtml)
# Using CSS selectors to scrape Away Odds
awayPinnacleHtml <- html_nodes(oddspage,'.eventLine-consensus+ .eventLine-book .eventLine-book-value:nth-child(1) b')
# Converting Away Odds to Text
awayPinnacle <- html_text(awayPinnacleHtml)
# Converting Away Odds to numeric
awayPinnacle <- as.numeric(awayPinnacle)
# Using CSS selectors to scrape Pinnacle Home Odds
homePinnacleHtml <- html_nodes(oddspage,'.eventLine-consensus+ .eventLine-book .eventLine-book-value+ .eventLine-book-value b')
# Converting Home Odds to Text
homePinnacle <- html_text(homePinnacleHtml)
# Converting Home Odds to Numeric
homePinnacle <- as.numeric(homePinnacle)
# Create Data Frame
df <- data.frame(away,home,away1H,home1H,awayPinnacle,homePinnacle)
}
date_vec <- sprintf('201902%02d', 02:06)
all_data <- do.call(rbind, lapply(date_vec, get_data))
View(all_data)
【问题讨论】:
-
哪个日期有数据缺失?
标签: r web-scraping