【问题标题】:Error while scraping: "Expecting value: line 1 column 1 (char 0)"抓取时出错:“期望值:第 1 行第 1 列(字符 0)”
【发布时间】:2021-09-25 11:21:22
【问题描述】:

我正在使用以下代码从烂番茄网站上抓取评论:

Link 到页面。

import requests
import re
import json
import pandas as pd
import numpy as np

r = requests.get("https://www.rottentomatoes.com/m/avatar/reviews?type=user")
content = json.loads(re.search('movieReview\s=\s(.*);', r.text).group(1))

movieId = content["movieId"]

def getReviews(endCursor):
    r = requests.get(f"https://www.rottentomatoes.com/napi/movie/{movieId}/reviews/user",
    params = {
        "direction": "next",
        "endCursor": endCursor,
        "startCursor": ""
    })
    return r.json()

data = {"User_Name": [], "Rating": [], "Review": []}
result = {}

for i in range(0, 5):
    #print(f"[{i}] request review")
    result = getReviews(result["pageInfo"]["endCursor"] if i != 0  else "")
    data['User_Name'].extend(t['displayName'] for t in result["reviews"])
    data['Rating'].extend(t['score'] for t in result["reviews"])
    data['Review'].extend(t['review'] for t in result["reviews"])
    df = pd.DataFrame(data)

我想把上面的代码转换成一个单独的函数。

在这里,我发布了我试图获取此功能代码但使用 json.loads() 给出错误的代码:
“预期值:第 1 行第 1 列(字符 0)”

我已经用谷歌搜索了这个解决方案,发现添加 headers 参数可以解决,但在这里不起作用。

我无法理解是什么导致了这个错误。如果有人可以指导我会很有帮助。

import requests
import re
import json
import pandas as pd
import numpy as np

def getReviews(movieId, endCursor):
    r = requests.get(f"https://www.rottentomatoes.com/napi/{movieId}/reviews/user",
    params = {
        "direction": "next",
        "endCursor": endCursor,
        "startCursor": ""
        },
    headers={'Content-Type': 'application/json'}
    )
    return r.json()

def ScrapeReviews(movie):
    url = "https://www.rottentomatoes.com/m/" + movie + "/reviews?type=user"
    req = requests.get(url)
    content = json.loads(re.search('movieReview\s=\s(.*);', req.text).group(1))
    movie_id = content["movieId"]

    data = {"User_Name": [], "Rating": [], "Review": []}
    result = {}

    for i in range(0, 5):
        #print(f"[{i}] request review")
        result = getReviews(movie_id, result["pageInfo"]["endCursor"] if i != 0  else "")
        data['User_Name'].extend(t['displayName'] for t in result["reviews"])
        data['Rating'].extend(t['score'] for t in result["reviews"])
        data['Review'].extend(t['review'] for t in result["reviews"])
        df = pd.DataFrame(data)
    
    return df 
d = ScrapeReviews('avatar')

【问题讨论】:

    标签: python json web-scraping rotten-tomatoes


    【解决方案1】:

    错误在getReviews函数中,url应该是:

    "https://www.rottentomatoes.com/napi/**movie**/{movieId}/reviews/user"
    

    import requests
    import re
    import json
    import pandas as pd
    import numpy as np
    
    
    def getReviews(movieId, endCursor):
        r = requests.get(
            f"https://www.rottentomatoes.com/napi/movie/{movieId}/reviews/user",
            params={"direction": "next", "endCursor": endCursor, "startCursor": ""},
            headers={"Content-Type": "application/json"},
        )
        return r.json()
    
    
    def ScrapeReviews(movie):
        url = "https://www.rottentomatoes.com/m/" + movie + "/reviews?type=user"
        req = requests.get(url)
        content = json.loads(re.search("movieReview\s=\s(.*);", req.text).group(1))
        movie_id = content["movieId"]
    
        data = {"User_Name": [], "Rating": [], "Review": []}
        result = {}
    
        for i in range(0, 5):
            result = getReviews(
                movie_id, result["pageInfo"]["endCursor"] if i != 0 else ""
            )
            data["User_Name"].extend(t["displayName"] for t in result["reviews"])
            data["Rating"].extend(t["score"] for t in result["reviews"])
            data["Review"].extend(t["review"] for t in result["reviews"])
    
        df = pd.DataFrame(data)
        return df
    
    
    d = ScrapeReviews("avatar")
    print(d)
    

    打印:

            User_Name  Rating                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         Review
    0           Joe D     5.0                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       To me this is the most perfect blockbuster of all.\nLove Sam Worthington's empty cup, I find his everyman acting compelling, Saldana may be the most beautiful woman on the planet with her trademark perfect posture, and Sigourney adds class with extra to spare wherever she goes.\nThe planet Pandora remains the real star, and the revelation that we're the bad guys and the spiritual tree-huggers were right all along, I find genuinely touching every time.\nFirst class and I can't wait for more of Cameron's magic touch.
    1         Jimmy W     1.0                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           The fact that this movie can make the most money of all time and also gain a following of hive-minded morons to defend it says more about the state of society than it does the movie itself. For a movie that's meant to make a point about abusive use of the environment, they sure seem to indulge in the use of massive amounts of expensive technology that no doubt utilized way more than its fair share of natural resources. Oh well, at least you can pretend to be vindicated by the box office numbers.
    2      Goudkuil E     1.5                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   Apart from the visuals everything feels uninspired and thrown together using a old cliche of an outsider seeing what's wrong with what he's people have been doing falling in love then whiching sides. The acting is ok, the dialogue is kinda rough. The movie is padded with a lot of nice scenique views with no real narrative meaning.
    3       Antonio D     4.0                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  o filme possui uma fotografia muito bela e, mesmo o filme sendo de 2009 não conseguimos encontrar defeitos em relação a montagem e fotografia, a história é satisfatória e é um reflexo do que sabemos que aconteceu no inicio da colonização
    
    ...and so on.
    

    【讨论】:

      猜你喜欢
      • 2021-05-02
      • 2013-05-10
      • 2019-02-25
      • 2018-08-28
      • 2023-03-25
      • 2018-06-28
      • 2020-11-11
      • 2022-11-15
      相关资源
      最近更新 更多