【发布时间】:2020-06-29 09:01:03
【问题描述】:
session = HTMLSession()
flag = 0
leap_year =[2016,2020]
month_30 = [4,6,9,11]
for year in range(2015,2020):
date =[]
times = []
temp = []
dew = []
hum =[]
wind=[]
wind_s =[]
wind_g =[]
press = []
precip = []
cond = []
for month in range(1,13):
for day in range(1,32):
if(year in leap_year):
if(month == 2 and day >29):
break
else:
if (month == 2 and day >28):
break
if(month in month_30 and day >30):
break
d = "{}-{}-{}".format(year,month,day)
start2 = time.time()
url = "https://www.wunderground.com/history/daily/ca/richmond/CYVR/date/{}".format(d)
r = session.get(url)
r.html.render(sleep =5)
soup = BeautifulSoup(r.html.html,'html.parser')
table = soup.find("tbody",{'role':'rowgroup'})
row = table.find_all('tr')
for r in row :
n = 0
date.append(d)
for cell in r.find_all('td'):
if(n == 0):
times.append(cell.text)
elif(n == 1):
temp.append(cell.text)
elif(n == 2):
dew.append(cell.text)
elif(n == 3):
hum.append(cell.text)
elif(n == 4):
wind.append(cell.text)
elif(n == 5):
wind_s.append(cell.text)
elif(n == 6):
wind_g.append(cell.text)
elif(n == 7):
press.append(cell.text)
elif(n == 8):
precip.append(cell.text)
elif(n == 9):
cond.append(cell.text)
n+=1
end2 = time.time()
print("it took : {} for one day ".format(end2-start2))
print(d)
end = time.time()
print(end-start)
嗨!我试图用 request_html 抓取网站。我不知道为什么在执行过程中,它突然停止了。有时,它在获取 2016 年 1 月 20 日的数据后立即停止,有时它在尝试获取 2016 年 3 月 20 日的数据时停止。它随机停止。有谁知道为什么? r.html.render 中的超时或休眠是问题吗?
【问题讨论】:
标签: python web-scraping python-requests-html