haiyan123

一、流程分析

第一步:获取登录页,获取X_Anti_Forge_Token,X_Anti_Forge_Code
    1、请求url:https://passport.lagou.com/login/login.html
    2、请求方式:get
    3、请求头:
           - cookie:用session处理了
           - User-Agent:Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36Name
第二步:登录
    1、请求url:https://passport.lagou.com/login/login.json
    2、请求方式:post
    3、请求头:
        cookie
        User-agent
        Referer:https://passport.lagou.com/login/login.html
        X-Anit-Forge-Code:53165984
        X-Anit-Forge-Token:3b6a2f62-80f0-428b-8efb-ef72fc100d78
        X-Requested-With:XMLHttpRequest
    4、请求体
        isValidate:true
        username:18611453110
        password:70621c64832c4d4d66a47be6150b4a8e
        request_form_verifyCode:\'\'
        submit:\'\'
第三步:授权
        1、请求url:https://passport.lagou.com/grantServiceTicket/grant.html
        2、请求方法:GET
        3、请求头:
           User-agent
           Referer:https://passport.lagou.com/login/login.html
第四步:验证
第五步:筛选职位信息
    请求url:https://www.lagou.com/jobs/list_java%E9%AB%98%E7%BA%A7%E5%BC%80%E5%8F%91
    请求方法:GET
    请求头:
        User-Agent
    请求参数:
        gj:3年及以下
        px:default
        yx:25k-50k
        city:北京
第六步:访问详情页,拿到X_Anti_Forge_Token,X_Anti_Forge_Code
    请求url:详情页地址
    请求方式:GET
    请求头:User-Agent
第七步:投递简历
    请求url:https://www.lagou.com/mycenterDelay/deliverResumeBeforce.json
    请求方式:POST
    请求头:
        Referer:详情页地址
        User-agent
        X-Anit-Forge-Code:53165984
        X-Anit-Forge-Token:3b6a2f62-80f0-428b-8efb-ef72fc100d78
        X-Requested-With:XMLHttpRequest
    请求体:
    positionId:职位ID
    type:1
    force:true

二、代码实现

  1 import requests
  2 import re
  3 from urllib.parse import urlencode
  4 session = requests.session()
  5 r1 = session.get(
  6     "https://passport.lagou.com/login/login.html",
  7     headers = {
  8         "User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36Name",
  9     }
 10 )
 11 X_Anit_Forge_Code  = re.findall("X_Anit_Forge_Code =\'(.*?)\'",r1.text,re.S)
 12 X_Anit_Forge_Token = re.findall("X_Anit_Forge_Token =\'(.*?)\'",r1.text,re.S)
 13 r2 = session.post(
 14     "https://passport.lagou.com/login/login.json",
 15     headers = {
 16         "User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36Name",
 17         "Referer":"https://passport.lagou.com/login/login.html",
 18         "X-Anit-Forge-Code":X_Anit_Forge_Code,
 19         "X-Anit-Forge-Token":X_Anit_Forge_Token,
 20         "X-Requested-With":"XMLHttpRequest"
 21     },
 22     data={
 23         "isValidate": True,
 24         \'username\': \'18611453110\',
 25         \'password\': \'70621c64832c4d4d66a47be6150b4a8e\',
 26         \'request_form_verifyCode\': \'\',
 27         \'submit\': \'\'
 28     }
 29 )
 30 r3 = session.get(
 31     "https://passport.lagou.com/grantServiceTicket/grant.html",
 32     headers = {
 33         "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36Name",
 34         \'Referer\': \'https://passport.lagou.com/login/login.html\',
 35     }
 36 )
 37 r4 = session.get(
 38     \'https://www.lagou.com/resume/myresume.html\',
 39     headers = {
 40         "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36Name",
 41     }
 42 )
 43 
 44 print(\'18611453110\' in r4.text)
 45 
 46 # ============================
 47 # res = urlencode({"k":"java高级开发"},encoding="utf-8").split("=")[-1]
 48 # url = "https://www.lagou.com/jobs/list_"+res
 49 # r5 =session.get(url,
 50 #             headers={
 51 #                     "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36Name",
 52 #                 },
 53 #              params={
 54 #                      \'gj\': \'3年及以下\',
 55 #                      \'px\': \'default\',
 56 #                      \'yx\': \'25k-50k\',
 57 #                      \'city\': \'北京\'
 58 #                 }
 59 #          ) #按照套路进行,结果取不到值,因为人家发的是ajax请求获取的数据,所以选择了r6的方式
 60 res = urlencode({"k":"java高级开发"},encoding="utf-8").split("=")[-1]
 61 url = "https://www.lagou.com/jobs/list_"+res
 62 r6 = session.post(
 63     \'https://www.lagou.com/jobs/postionAjax.json\',
 64     headers = {
 65         \'Referer\': url,
 66         "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36Name",
 67     },
 68     data = {
 69         "first":True,
 70         "pn":1,
 71         "kd":"java高级开发"
 72     },
 73     params = {
 74         "gj":"3年及以下",
 75         "gx":"default",
 76         "yx":"15k-25k",
 77         "city":"北京",
 78         "needAddtionResult":False,
 79         "isSchoolJob":0
 80     }
 81 )
 82 from pprint import pprint
 83 # print(r6.json())
 84 comapines_list=r6.json()[\'content\'][\'positionResult\'][\'result\']
 85 for comapiny in comapines_list:
 86     positionId=comapiny[\'positionId\']
 87     company_link=\'https://www.lagou.com/jobs/{pos_id}.html\'.format(pos_id=positionId)
 88     companyShortName = comapiny[\'companyShortName\']
 89     positionName = comapiny[\'positionName\']
 90     salary = comapiny[\'salary\']
 91     print(\'\'\'
 92     详情连接:%s
 93     公司名:%s
 94     职位名:%s
 95     薪资:%s
 96     \'\'\' %(company_link,companyShortName,positionName,salary))
 97     r7=session.get(company_link,
 98                 headers={
 99                     \'User-Agent\': \'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36\',
100                 }
101                 )
102     X_Anti_Forge_Token = re.findall("X_Anti_Forge_Token = \'(.*?)\'", r7.text, re.S)[0]
103     X_Anti_Forge_Code = re.findall("X_Anti_Forge_Code = \'(.*?)\'", r7.text, re.S)[0]
104     # print(X_Anti_Forge_Token,X_Anti_Forge_Code)
105 
106 
107     session.post(\'https://www.lagou.com/mycenterDelay/deliverResumeBeforce.json\',
108                  headers={
109                      \'User-Agent\': \'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36\',
110                      \'Referer\': company_link,
111                      \'X-Anit-Forge-Code\': X_Anti_Forge_Code,
112                      \'X-Anit-Forge-Token\': X_Anti_Forge_Token,
113                      \'X-Requested-With\': \'XMLHttpRequest\'
114                  },
115                  data={
116     \'positionId\':positionId,
117     \'type\':1,
118     \'force\':True
119                  }
120                  )
121     print(\'%s 投递成功\' %(companyShortName))
View Code

 

分类:

技术点:

相关文章: