此服务使用 http 流。它只会在以下端点上打开一个 http 连接:
POST https://collegecrisis.shinyapps.io/dashboard/__sockjs__/n={random_token}/t={token}/w={workerID}/s=0/{random_num}/{random_token2}/xhr_streaming
它将使用以下端点发送命令:
POST https://collegecrisis.shinyapps.io/dashboard/__sockjs__/n={random_token}/t={token}/w={workerID}/s=0/{random_num}/{random_token2}/xhr_send
您可以在 Chrome 开发控制台的网络选项卡中查看寻找xhr_streaming 的结果。
token 是从另一个 http 调用中检索到的:
GET https://collegecrisis.shinyapps.io/dashboard/{workerIDFull}__token__
而workerID 存在于原始页面本身中
一些名为单例的参数是必需的,它们也位于原始页面中的script 标记中,如下所示:
<script type="application/shiny-singletons">fafb5589cb5a9f24485f3df0511b50d5cd0c7497,603e796bcfc2ab3685167d58c426f64c15a95192</script>
以下脚本:
- 从原始页面中抓取所需的元素
- 使用 workerID 获取令牌
- 在新线程中启动 POST /xhr_streaming
- 在 POST /xhr_send 上发送“打开通道命令”,即
'["0#0|o|"]'
- 使用之前抓取的单例值和大型 JSON 静态配置发送“init 命令”
完整代码:
import requests
from bs4 import BeautifulSoup
import re
import time
from random import choice
from string import ascii_letters,digits
from threading import Thread
from time import sleep
import json
session = requests.Session()
r = session.get("https://collegecrisis.shinyapps.io/dashboard/")
soup = BeautifulSoup(r.content, "lxml")
singletons = soup.find("script", {"type":"application/shiny-singletons"}).text
workerIDFull = soup.find("base")["href"]
workerID = re.search('_w_(\w+)', workerIDFull).group(1)
timestamp = int(round(time.time() * 1000))
r = session.get(f"https://collegecrisis.shinyapps.io/dashboard/{workerIDFull}__token__",
params = {
"_": timestamp
})
token = r.text
random_token = ''.join(choice(ascii_letters) for i in range(18))
random_token2 = ''.join(choice(ascii_letters) for i in range(8))
random_num = ''.join(choice(digits) for i in range(3))
def getData():
r = requests.Request("POST", f"https://collegecrisis.shinyapps.io/dashboard/__sockjs__/n={random_token}/t={token}/w={workerID}/s=0/{random_num}/{random_token2}/xhr_streaming").prepare()
resp = session.send(r, stream=True)
for line in resp.iter_lines():
if line:
print(line)
splitted = str(line.decode('unicode_escape'))[2:-2].split("|")
if (len(splitted) > 2):
data = json.loads(splitted[2])
if ("values" in data):
print([ t["args"][8] for t in data["values"]["homeMap"]["x"]["calls"] if t["method"] == "addCircles"][0])
def openChannel():
r = session.post(f"https://collegecrisis.shinyapps.io/dashboard/__sockjs__/n={random_token}/t={token}/w={workerID}/s=0/{random_num}/{random_token2}/xhr_send",
data = '["0#0|o|"]', headers = {"Content-Type":"text/plain;charset=UTF-8"})
def sendInit():
data = json.dumps({
"method":"init",
"data":{
"sidebarItemExpanded":None,
"sidebarCollapsed":True,
"resetAll:shiny.action":0,
"fallResetAll:shiny.action":0,
"lawResetAll:shiny.action":0,
".clientdata_output_authModal_hidden":False,
".clientdata_output_homefullOnlineVB_hidden":False,
".clientdata_output_homepOnlineVB_hidden":False,
".clientdata_output_homeHybridVB_hidden":False,
".clientdata_output_homepPersonVB_hidden":False,
".clientdata_output_homePersonVB_hidden":False,
".clientdata_output_homeTBDVB_hidden":False,
".clientdata_output_homeOtherVB_hidden":False,
".clientdata_output_homeTotalShownVB_hidden":False,
".clientdata_output_homeMap_hidden":False,
".clientdata_output_graphStateFilter_hidden":True,
".clientdata_output_fallBarGraph_hidden":True,
".clientdata_output_covidAthleticGraph_hidden":True,
".clientdata_output_schoolCovidPlot_hidden":True,
".clientdata_output_intlFilter_hidden":True,
".clientdata_output_intlGraph_hidden":True,
".clientdata_output_facultyBarGraph_hidden":True,
".clientdata_output_stateTrendsGraph_hidden":True,
".clientdata_output_covidHeatmap_hidden":True,
".clientdata_output_announceHeatmap_hidden":True,
".clientdata_output_onlineHeatmap_hidden":True,
".clientdata_output_springBreak_hidden":True,
".clientdata_output_peerInstPicker_hidden":True,
".clientdata_output_statusFilter_hidden":True,
".clientdata_output_rankcatFilter_hidden":True,
".clientdata_output_hospitalFilter_hidden":True,
".clientdata_output_covidFilter_hidden":True,
".clientdata_output_campusTypeFilter_hidden":True,
".clientdata_output_sectorFilter_hidden":True,
".clientdata_output_ccbasicFilter_hidden":True,
".clientdata_output_divisionFilter_hidden":True,
".clientdata_output_conferenceFilter_hidden":True,
".clientdata_output_sizeSlider_hidden":True,
".clientdata_output_resHallSlider_hidden":True,
".clientdata_output_sportsRevenueSlider_hidden":True,
".clientdata_output_intlSlider_hidden":True,
".clientdata_output_onlineVB_hidden":True,
".clientdata_output_announcedVB_hidden":True,
".clientdata_output_noDecisionVB_hidden":True,
".clientdata_output_totalVB_hidden":True,
".clientdata_output_dateSlider_hidden":True,
".clientdata_output_springMap_hidden":True,
".clientdata_output_fallPeerInstPicker_hidden":True,
".clientdata_output_fallStatusFilter_hidden":True,
".clientdata_output_fallRankcatFilter_hidden":True,
".clientdata_output_fallFacultyFilter_hidden":True,
".clientdata_output_fallHospitalFilter_hidden":True,
".clientdata_output_fallCovidFilter_hidden":True,
".clientdata_output_fallCampusTypeFilter_hidden":True,
".clientdata_output_fallSectorFilter_hidden":True,
".clientdata_output_fallCcbasicFilter_hidden":True,
".clientdata_output_fallStaffFilter_hidden":True,
".clientdata_output_fallDivisionFilter_hidden":True,
".clientdata_output_fallConferenceFilter_hidden":True,
".clientdata_output_fallSizeSlider_hidden":True,
".clientdata_output_fallResHallSlider_hidden":True,
".clientdata_output_fallSportsRevenueSlider_hidden":True,
".clientdata_output_fallIntlSlider_hidden":True,
".clientdata_output_fallfullOnlineVB_hidden":True,
".clientdata_output_fallpOnlineVB_hidden":True,
".clientdata_output_fallHybridVB_hidden":True,
".clientdata_output_fallpPersonVB_hidden":True,
".clientdata_output_fallPersonVB_hidden":True,
".clientdata_output_fallTBDVB_hidden":True,
".clientdata_output_fallOtherVB_hidden":True,
".clientdata_output_fallTotalShownVB_hidden":True,
".clientdata_output_fallMap_hidden":True,
".clientdata_output_greFilter_hidden":True,
".clientdata_output_modelFilter_hidden":True,
".clientdata_output_planFilter_hidden":True,
".clientdata_output_videoPlatformFilter_hidden":True,
".clientdata_output_lawSectorFilter_hidden":True,
".clientdata_output_lawMinoritySlider_hidden":True,
".clientdata_output_lawLSATtwofiveSlider_hidden":True,
".clientdata_output_lawLSATmedianSlider_hidden":True,
".clientdata_output_lawLSATsevenfiveSlider_hidden":True,
".clientdata_output_lawAcceptanceSlider_hidden":True,
".clientdata_output_lawFYSlider_hidden":True,
".clientdata_output_lawFullOnlineVB_hidden":True,
".clientdata_output_lawPartialOnlineVB_hidden":True,
".clientdata_output_lawHybridVB_hidden":True,
".clientdata_output_lawPersonVB_hidden":True,
".clientdata_output_lawNDVB_hidden":True,
".clientdata_output_lawTotalVB_hidden":True,
".clientdata_output_lawMap_hidden":True,
".clientdata_output_intlOnlineVB_hidden":True,
".clientdata_output_intlHybridVB_hidden":True,
".clientdata_output_intlInPersonVB_hidden":True,
".clientdata_output_intlCovidVB_hidden":True,
".clientdata_output_intlTBDVB_hidden":True,
".clientdata_output_intlTotalVB_hidden":True,
".clientdata_output_intlMap_hidden":True,
".clientdata_pixelratio":1,
".clientdata_url_protocol":"https:",
".clientdata_url_hostname":"collegecrisis.shinyapps.io",
".clientdata_url_port":"",
".clientdata_url_pathname":"/dashboard/",
".clientdata_url_search":"",
".clientdata_url_hash_initial":"",
".clientdata_url_hash":"",
".clientdata_singletons": singletons,
".clientdata_allowDataUriScheme":True
}
})
r = session.post(f"https://collegecrisis.shinyapps.io/dashboard/__sockjs__/n={random_token}/t={token}/w={workerID}/s=0/{random_num}/{random_token2}/xhr_send",
data = f'["1#0|m|{json.dumps(data)[1:-1]}"]', headers = {"Content-Type":"text/plain;charset=UTF-8"})
thread = Thread(target = getData, args = ())
thread.start()
sleep(1)
openChannel()
sendInit()
thread.join()
如果您需要地图中的更多数据,请查看字段 data["values"]["homeMap"]["x"]["calls"]
run this on repl.it