以下对我有用:
library(rvest)
URL <- 'https://www.imdb.com/search/title/?title_type=feature&online_availability=US/IMDbTV&start=1251&ref_=adv_nxt'
webpage <- read_html(URL)
genres <- webpage %>%
html_nodes('span.genre') %>%
html_text() %>%
trimws()
这会返回 50 个值:
genres
# [1] "Comedy, Romance" "Action, Crime, Drama"
# [3] "Action, Horror, Sci-Fi" "Action, Adventure, Thriller"
# [5] "Adventure, Comedy, Family" "Comedy"
# [7] "Action, Adventure, Thriller" "Comedy, Drama, Romance"
# [9] "Comedy" "Comedy"
#[11] "Action, Adventure, Drama" "Action, Thriller"
#[13] "Action, Crime, Thriller" "Mystery, Thriller"
#[15] "Crime, Drama, Thriller" "Drama, Horror"
#[17] "Animation, Drama, War" "Drama, Thriller"
#[19] "Action, Crime, Drama" "Drama, Sci-Fi"
#[21] "Adventure, Comedy, Family" "Crime, Drama"
#[23] "Action, Adventure, Thriller" "Action, Adventure, Sci-Fi"
#[25] "Thriller" "Comedy, Crime"
#[27] "Comedy, Romance" "Action, Biography, Drama"
#[29] "Adventure, Comedy" "Crime, Drama, Thriller"
#[31] "Drama, Sci-Fi, Thriller" "Comedy, Romance"
#[33] "Action, Drama, Thriller" "Action, Adventure, Sci-Fi"
#[35] "Action, Crime, Drama" "Action, Adventure, Drama"
#[37] "Action, Thriller" "Action, Drama, War"
#[39] "Drama, Sci-Fi, Thriller" "Animation, Adventure, Family"
#[41] "Drama, Romance" "Action, Drama, Fantasy"
#[43] "Action, Adventure, Fantasy" "Comedy, Crime, Drama"
#[45] "Action, Crime, Drama" "Action, Adventure, Sci-Fi"
#[47] "Drama, Romance" "Animation, Family, Fantasy"
#[49] "Action, Adventure, Fantasy" "Mystery, Thriller"