from BeautifulSoup import BeautifulSoup
import re
#day input
day ='Thu'
#searches for a module (where html has rowspan="1")
module = re.compile(r'rowspan=\"1\"')
#lengths of module search (depending on html colspan attribute)
#1.5 hour
perlen15 = re.compile(r'colspan=\"3\"')
#2 hour
perlen2 = re.compile(r'colspan=\"4\"')
#2.5 hour etc.
perlen25 = re.compile(r'colspan=\"5\"')
perlen3 = re.compile(r'colspan=\"6\"')
perlen35 = re.compile(r'colspan=\"7\"')
perlen4 = re.compile(r'colspan=\"8\"')
#times correspond to first row of timetable.
times = ['8:00', '8:30', '9:00', '9:30', '10:00', '10:30', '11:00', '11:30', '12:00', '12:30', '13:00', '13:30', '14:00', '14:30', '15:00', '15:30']
#opens full timetable html
with open('timetable.txt', 'rt') as input_file:
html = input_file.read()
soup = BeautifulSoup(html)
#finds correct day tag
starttag = soup.find(text=day).parent.parent
nexttag = starttag
row=[]
#movement of cursor iterating over times list
curmv = 0
#puts following td tags for that day in a list
for time in times:
nexttag = nexttag.nextSibling.nextSibling
#detect if a module is found
found = module.search(repr(nexttag))
#detect length of that module
hour15 = perlen15.search(repr(nexttag))
hour2 = perlen2.search(repr(nexttag))
hour25 = perlen25.search(repr(nexttag))
hour3 = perlen3.search(repr(nexttag))
hour35 = perlen35.search(repr(nexttag))
hour4 = perlen4.search(repr(nexttag))
if found:
row.append(times[curmv])
row.append(nexttag)
if hour15:
curmv += 3
elif hour2:
curmv += 4
elif hour25:
curmv += 5
elif hour3:
curmv += 6
elif hour35:
curmv += 7
elif hour4:
curmv += 8
else:
curmv += 2
else:
curmv += 1
#write day to html file
with open('output.html', 'wt') as output_file:
for e in row:
output_file.write(str(e))
如您所见,代码可以区分 1 小时和 2 小时的讲座以及 1.5、2.5 小时的讲座等。
我现在唯一的问题是第 32 行,我需要一种更好的方法来告诉代码停止在表格中水平移动,也就是:知道何时停止 for 循环(在之前的代码中,我有 while x < 18: 仅适用于星期一,因为行中有 18 个 td 标签。当循环到达父 </tr> 标签时,如何让循环停止?
谢谢!
编辑:如果我将“时间”设置一直设置到 18:00,我将尝试使用 try 和 except 块来捕获我得到的错误。
EDIT2:成功了! :D