【发布时间】:2018-12-27 10:59:24
【问题描述】:
我正在尝试使用 RSelenium 从网站上抓取数据。我可以单独浏览下拉菜单,但是当我在循环中运行它们时会出错。
此外,在选择下拉列表中的所有值后,我想将设施名称和联系方式存储在表格中。到目前为止,我无法做到这一点。
rm(list=ls())
setwd("D:\\work_codes\\kvk\\data")
getwd()
library(RSelenium)
library(rvest)
library(XML)
library(RCurl)
library(magrittr)
library(stringr)
rd<-rsDriver()
remDr<-rd[["client"]]
remDr$navigate("https://kvk.icar.gov.in/facilities_list.aspx")
remDr$refresh()
stateEle<-remDr$findElement("id", "ContentPlaceHolder1_ddlState")
states<-stateEle$getElementText()[[1]] %>% strsplit(., '\\n') %>% unlist %>% setdiff(., ' --Select--')
states<-str_trim(states, 'left')
stateEle$clickElement()
for (i in 1:length(states)) {
remDr$refresh()
stateEle$clickElement()
stateEle$sendKeysToElement(list(states[i]))
stateEle$clickElement()
districts<-NULL
distEle<-remDr$findElement("id", "ContentPlaceHolder1_ddlDistrict")
districts<-distEle$getElementText()[[1]] %>% strsplit(., '\\n') %>% unlist %>% setdiff(., ' --Select--')
districts<-str_trim(districts, 'left')
for (j in 1:length(districts)) {
distEle$clickElement()
distEle$sendKeysToElement(list(districts[j]))
distEle$clickElement()
kvk<-NULL
kvkEle<-remDr$findElement("id", "ContentPlaceHolder1_ddlKvk")
kvk<-kvkEle$getElementText()[[1]] %>% strsplit(., '\\n') %>% unlist %>% setdiff(., ' --Select--')
kvk<-str_trim(kvk, 'left')
for (k in 1:length(kvk)) {
kvkEle$clickElement()
kvkEle$sendKeysToElement(list(kvk[[1]]))
kvkEle$clickElement()
submitEle<-remDr$findElement("id", "ContentPlaceHolder1_btnSubmit")
submitEle$clickElement()
doc<-remDr$findElement('id', 'ContentPlaceHolder1_rptfacility_f_name_1')
doc$getElementText()
doc$clickElement()
remDr$findElement('class name','Contact details:')
}
}
}
【问题讨论】: