【发布时间】:2021-12-27 05:23:03
【问题描述】:
我正在尝试使用下面包含的网站在 Zillow 的两个页面之间抓取大约 54 个“代理列表”和 11 个“其他列表”,但我的代码仅在第一个生成“代理列表”的前 20 个结果搜索结果页面。如何修改我的代码以获取“代理列表”和“其他列表”的所有页面上的所有结果?
res_all <-NULL
for (page_result in 1:40) {
zillow_url = paste0("https://www.zillow.com/providence-ri/duplex/?searchQueryState=%7B%22pagination%22%3A%7B%7D%2C%22usersSearchTerm%22%3A%22Providence%2C%20RI%22%2C%22mapBounds%22%3A%7B%22west%22%3A-71.48892251635742%2C%22east%22%3A-71.36017648364258%2C%22south%22%3A41.77131876826507%2C%22north%22%3A41.862664689400106%7D%2C%22regionSelection%22%3A%5B%7B%22regionId%22%3A26637%2C%22regionType%22%3A6%7D%5D%2C%22isMapVisible%22%3Atrue%2C%22filterState%22%3A%7B%22sort%22%3A%7B%22value%22%3A%22globalrelevanceex%22%7D%2C%22ah%22%3A%7B%22value%22%3Atrue%7D%2C%22sf%22%3A%7B%22value%22%3Afalse%7D%2C%22tow%22%3A%7B%22value%22%3Afalse%7D%2C%22con%22%3A%7B%22value%22%3Afalse%7D%2C%22apco%22%3A%7B%22value%22%3Afalse%7D%2C%22land%22%3A%7B%22value%22%3Afalse%7D%2C%22apa%22%3A%7B%22value%22%3Afalse%7D%2C%22manu%22%3A%7B%22value%22%3Afalse%7D%7D%2C%22isListVisible%22%3Atrue%2C%22mapZoom%22%3A13%7D")
zpg = read_html(zillow_url)
zillow_pg <-tibble(
addr = zpg %>% html_nodes(".list-card-addr") %>% html_text(),
price = zpg %>% html_nodes(".list-card-price") %>% html_text(),
details = zpg %>% html_nodes(".list-card-details") %>% html_text() ,
heading= zpg %>% html_nodes(".list-card-info a") %>% html_text() ,
type = zpg %>% html_nodes(".list-card-statusText") %>% html_text())
res_all <- distinct(bind_rows(res_all, zillow_pg))
}
【问题讨论】:
标签: r web-scraping rvest