【发布时间】:2018-09-12 20:06:28
【问题描述】:
声明:- 我正在尝试废弃该网址。我正在尝试使用代码中设置的参数在发布请求后获取数据。
问题:- 我实际上正在获取原始 .aspx 页面的 html,并且未设置我在“formFields”中设置的参数。谁能解释我哪里出错了。
import urllib
import urllib2
uri = 'http://cbseaff.nic.in/cbse_aff/schdir_Report/userview.aspx'
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Content-Type': 'text/html; charset=utf-8'
}
formFields = (
(r'__VIEWSTATE',r'yE41BXTcCz0TfHmpBjw1lXLQe3PFLLTPmxSVU0hSeOt6wKGfqvehmJiLNmLj2VdBWrbXtmUIr0bh8lgw8X8f/yn9T1D4zmny5nUAc5HpgouURKeWzvZasUpmJUJ8pgh4jTUo62EVRQCYkXKayQVbCwaSP81BxDO9gxrERvzCUlw8i76A4jzlleSSunjr844drlsOw/PxjgYeRZCLm/h8WAc5HZrJ+w7vLMyLxlY/mDQaYdkVAF/s4lAJAxGfnX1rlshirdphBhI1tZIuoJa+ZTNzizgrXi70PVnAR3cw0QhCWr2rrTkrvoJ+rI5pme0pYPAX+CZfmSH3Cg1BKEbY/+G+p1AsLRqsobC8EBQXHPicqnhgOR7/kx+Z54XyCzxDwXCZBFKl3npcSq4xJ2Ebi3PFS6FtT0K+wZTZ8XGCQUVudzKyqhfDxQ4UTpDWn4vR7LIF765qhXpRmNG6HCzbvgrLqNrBzt+PZ0mbpeLsIWEia5L/AIbN+zcLiNsBxTBA9zOsljZDPkbL1rWo+WDUwBfDRiu6X4ru+RKypczFFzoUUciCaWD2ciOq+//NYa7NEwZ9d7YRY/LfEhszYUJO72Xpzaotxu7+7RdGVvsxzrh1Ro8OHqoskesX6QlEdqjakgrk3yqaroXCfnnBJu1ExKYXrU6JuVFbOGz66CZfkqiJJSVHs2Ozfos/yPWWiJVyETKtPQF003o9GGKKIFIaQ6YRHoco3jVOKB+NIE/VxMJHiSzPuuExZ27MY/pX9U9rx5/4az6i/LIqYoRzQilJT7lN5eTIdVn/W5Uep0tvKtOKhR7JjX7WhO7QpROlOh7H/wbCdr5u/jXB5FpJLB1Y8oZSdVyKJK8Xx+tODFvxYxMve5NT+/fBwlMSfkh7+h7+5Zo5hHKwLF01mrS52KHehix4Ja5bQ3hy6Q2r6pz+ZVHvUsev9OpsZlO1+PPjVCbdjzeUX23At6R4BRm6Zq/o0knl2Xf/kvM6WtKtDi+AbpIa7Eyj+brqWMdDKW4AiOLGS45H4ERP2z1CeVwiXlfa22JhkPbz8GJf9J9iey+JefnmsgD5ScdcvPU1mb1i7NLv9QOYXZXZwJXhGHZcw65iQm7vrZB5sJlBp7agBhhwX2KNaKZguNGVGhlxiS84zoKrkvdBv7e52n6H9p3okMvqHR+yEe+UCuDPanO+9tTvNvOqBzEAVvPYIK80YWsuDY3R66OBPjQEKpbPrDpz5hoMKk59r1FiIq6Jd71c6QeE57Au3ei72GZEVzaLXAva0RJP/tSROnO7ZKHkvxuP0oayVlzjLuIHnO0o4zUsoHpTJqPa20Bxv9De3JwOOi8HJgYj+dZjdRIDT9zHhgbLV9UO4z0HHyK54RIS67OAS8WqMYyfdC5I5GGwy8rosVKNjCfHymMEUzbs5iHCPhrM/X0UMJTxQ7yq113/6U43p6BP4PqP/OAgRYxejrVtT9goPKWxHTwu0kDROXCVvqHo5SiQ+/X3DdTxLF+13p0k5xlXBk0qkeLJkNlSYBeTOgPyvjHxnSMUdjhjHtiA0oFCSSCYCpVU9Pe3PLQyyUjv+KhI/jWS94D3KxYqXjyHUC/nMxEwot65kzFE/scAoOsdk/MJS/uZw4PbfbGEVKWTcJLtOV8s3wHKPzmB/AexZ//iEmDv'),
(r'__VIEWSTATEGENERATOR','AC872EDD'),
(r'__EVENTVALIDATION',r'35e8G73FpRBmb1p/I2BD+iKRsX5vU8sS0eAo0dx/dcp4JGg0Jp+2ZLyg8GHhCmrVcGjQVBABi8oOAgXt+2OghWX0FyVvk0fo8XPTIL/uMJjSBFrRvgsQNnh/Ue1VCOGhBgwhmrBt+VbjmmkA3hafVR2lAVjy/RYswgFcmyloWPyxQDu9UuktSiCNfyfJcF/xZrJDTeHBkNmB9P8QGNHLVnoaZjsmks/yB+Nll5ZLiq0WvDRhsbq1I/rrRWytnleHAMNutsEqdU9OvycQ/insfM871Jww+TyIvmxEVRYcAH6MnYl0nqUaderHN4R37aH8CH8B/NUxGDYLSdlsvJGJxXEZh9EVzzDprJuz7sJUxolBhjv6YNfJDAThrLCip2QEY20SztPZ/j8KnWgnX7Xs6sxjZofKnQxNlB44dZG0okIPitb9zjWB6kC2xDmN69vfDayDOCkcPJG3q/HMP6ewQvV/YheqUbuBwC77WPIKXrc='),
(r'ctl00$ContentPlaceHolder1$optlist', 'State+Wise'),
(r'ctl00$ContentPlaceHolder1$ddlitem' , '22'),
(r'ctl00$ContentPlaceHolder1$optlistChoice', 'List+of+All+Schools'),
(r'ctl00$ContentPlaceHolder1$search', 'Search'),
(r'__EVENTTARGET','ctl00$ContentPlaceHolder1$search')
)
encodedFields = urllib.urlencode(formFields)
req = urllib2.Request(uri, encodedFields, headers)
f= urllib2.urlopen(req)
try:
fout = open('temp.htm', 'w')
except:
print('Could not open output file\n')
fout.writelines(f.readlines())
fout.close()
【问题讨论】:
标签: python http web-scraping