一、流程分析
第一步:获取登录页,获取X_Anti_Forge_Token,X_Anti_Forge_Code 1、请求url:https://passport.lagou.com/login/login.html 2、请求方式:get 3、请求头: - cookie:用session处理了 - User-Agent:Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36Name 第二步:登录 1、请求url:https://passport.lagou.com/login/login.json 2、请求方式:post 3、请求头: cookie User-agent Referer:https://passport.lagou.com/login/login.html X-Anit-Forge-Code:53165984 X-Anit-Forge-Token:3b6a2f62-80f0-428b-8efb-ef72fc100d78 X-Requested-With:XMLHttpRequest 4、请求体 isValidate:true username:18611453110 password:70621c64832c4d4d66a47be6150b4a8e request_form_verifyCode:\'\' submit:\'\' 第三步:授权 1、请求url:https://passport.lagou.com/grantServiceTicket/grant.html 2、请求方法:GET 3、请求头: User-agent Referer:https://passport.lagou.com/login/login.html 第四步:验证 第五步:筛选职位信息 请求url:https://www.lagou.com/jobs/list_java%E9%AB%98%E7%BA%A7%E5%BC%80%E5%8F%91 请求方法:GET 请求头: User-Agent 请求参数: gj:3年及以下 px:default yx:25k-50k city:北京 第六步:访问详情页,拿到X_Anti_Forge_Token,X_Anti_Forge_Code 请求url:详情页地址 请求方式:GET 请求头:User-Agent 第七步:投递简历 请求url:https://www.lagou.com/mycenterDelay/deliverResumeBeforce.json 请求方式:POST 请求头: Referer:详情页地址 User-agent X-Anit-Forge-Code:53165984 X-Anit-Forge-Token:3b6a2f62-80f0-428b-8efb-ef72fc100d78 X-Requested-With:XMLHttpRequest 请求体: positionId:职位ID type:1 force:true
二、代码实现
1 import requests 2 import re 3 from urllib.parse import urlencode 4 session = requests.session() 5 r1 = session.get( 6 "https://passport.lagou.com/login/login.html", 7 headers = { 8 "User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36Name", 9 } 10 ) 11 X_Anit_Forge_Code = re.findall("X_Anit_Forge_Code =\'(.*?)\'",r1.text,re.S) 12 X_Anit_Forge_Token = re.findall("X_Anit_Forge_Token =\'(.*?)\'",r1.text,re.S) 13 r2 = session.post( 14 "https://passport.lagou.com/login/login.json", 15 headers = { 16 "User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36Name", 17 "Referer":"https://passport.lagou.com/login/login.html", 18 "X-Anit-Forge-Code":X_Anit_Forge_Code, 19 "X-Anit-Forge-Token":X_Anit_Forge_Token, 20 "X-Requested-With":"XMLHttpRequest" 21 }, 22 data={ 23 "isValidate": True, 24 \'username\': \'18611453110\', 25 \'password\': \'70621c64832c4d4d66a47be6150b4a8e\', 26 \'request_form_verifyCode\': \'\', 27 \'submit\': \'\' 28 } 29 ) 30 r3 = session.get( 31 "https://passport.lagou.com/grantServiceTicket/grant.html", 32 headers = { 33 "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36Name", 34 \'Referer\': \'https://passport.lagou.com/login/login.html\', 35 } 36 ) 37 r4 = session.get( 38 \'https://www.lagou.com/resume/myresume.html\', 39 headers = { 40 "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36Name", 41 } 42 ) 43 44 print(\'18611453110\' in r4.text) 45 46 # ============================ 47 # res = urlencode({"k":"java高级开发"},encoding="utf-8").split("=")[-1] 48 # url = "https://www.lagou.com/jobs/list_"+res 49 # r5 =session.get(url, 50 # headers={ 51 # "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36Name", 52 # }, 53 # params={ 54 # \'gj\': \'3年及以下\', 55 # \'px\': \'default\', 56 # \'yx\': \'25k-50k\', 57 # \'city\': \'北京\' 58 # } 59 # ) #按照套路进行,结果取不到值,因为人家发的是ajax请求获取的数据,所以选择了r6的方式 60 res = urlencode({"k":"java高级开发"},encoding="utf-8").split("=")[-1] 61 url = "https://www.lagou.com/jobs/list_"+res 62 r6 = session.post( 63 \'https://www.lagou.com/jobs/postionAjax.json\', 64 headers = { 65 \'Referer\': url, 66 "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36Name", 67 }, 68 data = { 69 "first":True, 70 "pn":1, 71 "kd":"java高级开发" 72 }, 73 params = { 74 "gj":"3年及以下", 75 "gx":"default", 76 "yx":"15k-25k", 77 "city":"北京", 78 "needAddtionResult":False, 79 "isSchoolJob":0 80 } 81 ) 82 from pprint import pprint 83 # print(r6.json()) 84 comapines_list=r6.json()[\'content\'][\'positionResult\'][\'result\'] 85 for comapiny in comapines_list: 86 positionId=comapiny[\'positionId\'] 87 company_link=\'https://www.lagou.com/jobs/{pos_id}.html\'.format(pos_id=positionId) 88 companyShortName = comapiny[\'companyShortName\'] 89 positionName = comapiny[\'positionName\'] 90 salary = comapiny[\'salary\'] 91 print(\'\'\' 92 详情连接:%s 93 公司名:%s 94 职位名:%s 95 薪资:%s 96 \'\'\' %(company_link,companyShortName,positionName,salary)) 97 r7=session.get(company_link, 98 headers={ 99 \'User-Agent\': \'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36\', 100 } 101 ) 102 X_Anti_Forge_Token = re.findall("X_Anti_Forge_Token = \'(.*?)\'", r7.text, re.S)[0] 103 X_Anti_Forge_Code = re.findall("X_Anti_Forge_Code = \'(.*?)\'", r7.text, re.S)[0] 104 # print(X_Anti_Forge_Token,X_Anti_Forge_Code) 105 106 107 session.post(\'https://www.lagou.com/mycenterDelay/deliverResumeBeforce.json\', 108 headers={ 109 \'User-Agent\': \'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36\', 110 \'Referer\': company_link, 111 \'X-Anit-Forge-Code\': X_Anti_Forge_Code, 112 \'X-Anit-Forge-Token\': X_Anti_Forge_Token, 113 \'X-Requested-With\': \'XMLHttpRequest\' 114 }, 115 data={ 116 \'positionId\':positionId, 117 \'type\':1, 118 \'force\':True 119 } 120 ) 121 print(\'%s 投递成功\' %(companyShortName))