wyy1480
import json
import requests
from requests.exceptions import RequestException
import re
import time
print("0")

def get_hot_movie_rank(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return response.text
        return None
    except RequestException:
        return None
print("1")

def parse_hot_movie_rank(html):
    pattern = re.compile(\'<dd>.*?board-index.*?>(\d+)</i>.*?data-src="(.*?)".*?name"><a\'
                         + \'.*?>(.*?)</a>.*?star">(.*?)</p>.*?releasetime">(.*?)</p>\'
                         + \'.*?realtime">.*?stonefont">(.*?)</span></span>(.*?)</p>\'
                         + \'.*?total-boxoffice">.*?stonefont">(.*?)</span></span>(.*?)</p>.*?</dd>\',re.S)
    items = re.findall(pattern,html)
    for item in items:
        yield {
            \'index\':item[0],
            \'image\':item[1],
            \'title\':item[2],
            \'actor\':item[3].strip()[3:],
            \'release time\':item[4].strip()[5:],
            \'Real-time box office\':(item[5] + item[6]).strip() ,
            \'Total box office\':(item[7] + item[8]).strip()
            }
print("2")
      
def write_to_file(content):
    with open(\'maoyan1.txt\',\'a\',encoding=\'utf-8\') as f:
        f.write(json.dumps(content,ensure_ascii = False) + \'n\')
print("3")

def main():
    url = \'https://maoyan.com/board/1\'
    html = get_hot_movie_rank(url)
    for item in parse_hot_movie_rank(html):
        print(item)
        write_to_file(item)
print("4")

if __name__ == \'__main__\':
    main()
    time.sleep(1)
print("5")

0
1
2
3
4
{\'index\': \'1\', \'image\': \'https://p0.meituan.net/moviemachine/f7d2ad70eb79d6d9b8a197713db9b8c41711752.jpg@160w_220h_1e_1c\', \'title\': \'复仇者联盟4:终局之战\', \'actor\': \'小罗伯特·唐尼,克里斯·埃文斯,马克·鲁法洛\', \'release time\': \'2019-04-24\', \'Real-time box office\': \'&#xe975;.&#xea4f;&#xecf7;亿\', \'Total box office\': \'&#xe975;&#xecf7;.&#xf3f0;&#xec2f;亿\'}
{\'index\': \'2\', \'image\': \'https://p1.meituan.net/movie/d28b729ffe72353a72d1e7ef8a9b90591544978.jpg@160w_220h_1e_1c\', \'title\': \'何以为家\', \'actor\': \'赞恩·阿尔·拉菲亚,约丹诺斯·希费罗,博鲁瓦蒂夫·特雷杰·班科尔\', \'release time\': \'2019-04-29\', \'Real-time box office\': \'&#xf6e3;&#xea4f;&#xf3f0;&#xe975;.&#xec1d;万\', \'Total box office\': \'&#xe975;&#xec1d;&#xf3f0;&#xf6e3;.&#xf3f0;万\'}
{\'index\': \'3\', \'image\': \'https://p0.meituan.net/movie/29cebff7d3ed1cf98fbeb6b01c908e1b9947789.jpg@160w_220h_1e_1c\', \'title\': \'雪暴\', \'actor\': \'张震,廖凡,倪妮\', \'release time\': \'2019-04-30\', \'Real-time box office\': \'&#xea4f;&#xe975;&#xe0b0;.&#xe0b0;万\', \'Total box office\': \'&#xea4f;&#xec2f;&#xe343;.&#xf3f0;万\'}
{\'index\': \'4\', \'image\': \'https://p0.meituan.net/moviemachine/409aca94fa1695a6bdb5206735189c11495127.jpg@160w_220h_1e_1c\', \'title\': \'下一任:前任\', \'actor\': \'郭采洁,郑恺,李东学\', \'release time\': \'2019-05-01\', \'Real-time box office\': \'&#xe0b0;&#xf6e3;&#xe343;.&#xecf7;万\', \'Total box office\': \'&#xe0b0;&#xf6e3;&#xec1d;.&#xf3f0;万\'}
{\'index\': \'5\', \'image\': \'https://p1.meituan.net/movie/c63849c7a9de360a7b192bc322792a111705236.jpg@160w_220h_1e_1c\', \'title\': \'反贪风暴4\', \'actor\': \'古天乐,郑嘉颖,林峯\', \'release time\': \'2019-04-04\', \'Real-time box office\': \'&#xf6e3;&#xe975;&#xecf7;.&#xec1d;万\', \'Total box office\': \'&#xe343;.&#xec1d;&#xf3f0;亿\'}
{\'index\': \'6\', \'image\': \'https://p0.meituan.net/moviemachine/90258899534b9cca44f2e9b9a6246504248749.jpg@160w_220h_1e_1c\', \'title\': \'动物出击\', \'actor\': \'景熙童\', \'release time\': \'2019-04-30\', \'Real-time box office\': \'&#xf6e3;&#xf6e3;&#xea4f;.&#xe343;万\', \'Total box office\': \'&#xf6e3;&#xf6e3;&#xecf7;.&#xf3f0;万\'}
{\'index\': \'7\', \'image\': \'https://p0.meituan.net/movie/eda6595dc2c3a5d7cdda5eb4f8d8b1982460902.jpg@160w_220h_1e_1c\', \'title\': \'撞死了一只羊\', \'actor\': \'金巴,更登彭措,索朗旺姆\', \'release time\': \'2019-04-26\', \'Real-time box office\': \'&#xf6e3;&#xf6e3;&#xf3f0;.&#xec1d;万\', \'Total box office\': \'&#xe343;&#xf6e3;&#xec2f;.&#xf3f0;万\'}
{\'index\': \'8\', \'image\': \'https://p0.meituan.net/movie/29caaa1b66c95807a3f4d29b5b03644b1876102.jpg@160w_220h_1e_1c\', \'title\': \'调音师\', \'actor\': \'阿尤斯曼·库拉纳,塔布,拉迪卡·艾普特\', \'release time\': \'2019-04-03\', \'Real-time box office\': \'&#xe343;&#xf47c;.&#xf3f0;万\', \'Total box office\': \'&#xe0b0;.&#xf6e3;&#xea4f;亿\'}
{\'index\': \'9\', \'image\': \'https://p0.meituan.net/movie/0253cac859838e4fd6ae94cf986b07971008254.jpg@160w_220h_1e_1c\', \'title\': \'神奇乐园历险记\', \'actor\': \'索菲亚·玛丽,詹妮弗·加纳,肯·哈德森·坎贝尔\', \'release time\': \'2019-04-19\', \'Real-time box office\': \'&#xe0b0;&#xe343;.&#xf3f0;万\', \'Total box office\': \'&#xe975;&#xe343;&#xe343;&#xf6e3;.&#xf3f0;万\'}
{\'index\': \'10\', \'image\': \'https://p0.meituan.net/movie/86aba43e286ed044a544a75748d08aca3798593.jpg@160w_220h_1e_1c\', \'title\': \'天上再见\', \'actor\': \'纳威尔·佩雷兹·毕斯卡亚特,阿尔贝·杜邦泰尔,艾米莉·德奎恩\', \'release time\': \'2019-04-30\', \'Real-time box office\': \'&#xe975;&#xec2f;.&#xf6e3;万\', \'Total box office\': \'&#xec2f;&#xf3f0;.&#xf3f0;万\'}
5

不懂数字为什么都以&#x开头,so 并没有完成我的目标,ing~

记于20190818,前几天在看别人的爬虫时,看到别人提及了这种反爬技术,所以,我接下来要更新这篇文章啦!虽然妇联已经过去了很久,最近哪吒比较火

分类:

技术点:

相关文章:

  • 2021-06-29
  • 2021-10-04
  • 2021-07-16
  • 2021-05-08
  • 2021-11-26
  • 2021-10-04
  • 2021-10-04
  • 2021-09-14
猜你喜欢
  • 2021-10-04
  • 2018-11-21
  • 2021-10-04
  • 2021-10-04
  • 2021-09-01
  • 2021-10-04
  • 2018-05-23
相关资源
相似解决方案