eat-too-much

简介

我的sisiter,想要我爬一些试题给她。有80套,她不想手工点,所以,我来了,比较简单的网站。所以没有费很大的力气。期间参考了一系列的网站,都没有记录下来。

code

#!/usr/bin/env python
#coding=utf-8

import pdfkit
import time
import requests
import sys
import urllib2
import re

def get_hiddenvalue(url):
	request=urllib2.Request(url)
	reponse=urllib2.urlopen(request)
	resu=reponse.read()
	VIEWSTATE=re.findall(r\'<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="(.*?)" />\',resu, re.I)
	EVENTVALIDATION =re.findall(r\'input type="hidden" name="__EVENTVALIDATION" id="__EVENTVALIDATION" value="(.*?)" />\', resu,re.I)
	return VIEWSTATE[0],EVENTVALIDATION[0]

def get_hiddenvalue_string(myStr):
	VIEWSTATE=re.findall(r\'<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="(.*?)" />\',myStr, re.I)
	EVENTVALIDATION =re.findall(r\'input type="hidden" name="__EVENTVALIDATION" id="__EVENTVALIDATION" value="(.*?)" />\', myStr,re.I)
	return VIEWSTATE[0],EVENTVALIDATION[0]

reload(sys)
sys.setdefaultencoding( "utf-8" )
data = {
	\'cid\':\'1\',
	\'pid\':\'5\'
}
headers = {
    \'User-Agent\': \'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36\',
}
options = {
    \'page-size\': \'A4\',
    \'margin-top\': \'0mm\',
    \'margin-right\': \'0mm\',
    \'margin-bottom\': \'0mm\',
    \'margin-left\': \'0mm\',
    # \'orientation\':\'Landscape\',#横向
    \'encoding\': "UTF-8",
    \'no-outline\': None,
      # \'footer-right\':\'[page]\' 设置页码
}
myMap=["UniversityPsychology","HigherEducationRegulations","HigherEducation","TeacherEthics"]
confg = pdfkit.configuration(wkhtmltopdf=\'/usr/local/bin/wkhtmltopdf\')
for i in range(4):
	for j in range(20):
		data[\'cid\'] = str(i+1)
		data[\'pid\'] = str(j+1)
		url=\'http://zjzx.zjnu.edu.cn/test/Default.aspx?cid=\'+str(i+1)+\'&pid=\'+str(j+1)
		#response = requests.post(url=url,data=data,headers=headers)
		myStr = myMap[i]+\'_\'+\'exam\'+str(j+1)+\'.pdf\'
		#response.encoding = \'utf-8\'
		myStr1 = myMap[i]+\'_\'+\'exam\'+str(j+1)+\'.html\'

		data[\'Button1\'] = \'提交并查看单选题答案\'
		data[\'drop1\']= \'1\'
		data[\'drop2\']=\'1\'
		data[\'__VIEWSTATE\'],data[\'__EVENTVALIDATION\'] = get_hiddenvalue(url) # 请查阅__VIEWSTATE __EVENTVALIDATION 一个是把所有的按键信息编码上传,一个是加密用的
		response = requests.post(url=url,data=data,headers=headers)
		responseReturn = response.text
		time.sleep(3)
		del data[\'Button1\']
		data[\'__VIEWSTATE\'],data[\'__EVENTVALIDATION\']=get_hiddenvalue_string(responseReturn)
		data[\'Button2\']=\'提交并查看多选题答案\'
		response = requests.post(url=url,data=data,headers=headers)
		responseReturn = response.text

		time.sleep(3)
		del data[\'Button2\']
		data[\'__VIEWSTATE\'],data[\'__EVENTVALIDATION\']=get_hiddenvalue_string(responseReturn)
		data[\'Button3\']=\'提交并查看判断题答案\'
		response = requests.post(url=url,data=data,headers=headers)
		responseReturn = response.text
		del data[\'Button3\']
		with open(myStr1,\'a\') as file:
			file.write(responseReturn)
				
		with open(myStr1, \'r\') as file:
			answer = file.read()
			pdfkit.from_string(answer, myStr,configuration=confg,options=options)
		
		#pdfkit.from_url(url, myStr,configuration=confg)

分类:

技术点:

相关文章: