试试这个以获取您希望从这些链接中获取的任何字段。目前它获取了标题。您可以修改res.json()['data'][0]['attributes']['name'] 以获取您感兴趣的任何领域。 Mkae 确保将网址放在此列表中urls_to_scrape。
import json
import requests
from bs4 import BeautifulSoup
from urllib.parse import unquote
urls_to_scrape = {
'https://apps.apple.com/us/app/bingo-story-live-bingo-games/id1179108009?uo=4',
'https://apps.apple.com/us/app/jackpot-boom-casino-slots/id1554995201?uo=4'
}
base_url = 'https://apps.apple.com/us/app/bingo-story-live-bingo-games/id1179108009?uo=4'
link = 'https://amp-api.apps.apple.com/v1/catalog/US/apps/{}'
params = {
'platform': 'web',
'additionalPlatforms': 'appletv,ipad,iphone,mac',
'extend': 'customPromotionalText,customScreenshotsByType,description,developerInfo,distributionKind,editorialVideo,fileSizeByDevice,messagesScreenshots,privacy,privacyPolicyText,privacyPolicyUrl,requirementsByDeviceFamily,supportURLForLanguage,versionHistory,websiteUrl',
'include': 'genres,developer,reviews,merchandised-in-apps,customers-also-bought-apps,developer-other-apps,app-bundles,top-in-apps,related-editorial-items',
'l': 'en-us',
'limit[merchandised-in-apps]': '20',
'omit[resource]': 'autos',
'sparseLimit[apps:related-editorial-items]': '5'
}
with requests.Session() as s:
s.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36'
res = s.get(base_url)
soup = BeautifulSoup(res.text,"lxml")
token_raw = soup.select_one("[name='web-experience-app/config/environment']").get("content")
token = json.loads(unquote(token_raw))['MEDIA_API']['token']
s.headers['Accept'] = 'application/json'
s.headers['Referer'] = 'https://apps.apple.com/'
s.headers['Authorization'] = f'Bearer {token}'
for url in urls_to_scrape:
id_ = url.split("/")[-1].strip("id").split("?")[0]
res = s.get(link.format(id_),params=params)
title = res.json()['data'][0]['attributes']['name']
print(title)