【发布时间】:2022-01-16 22:14:21
【问题描述】:
我是使用 GCP 功能/产品的初学者。 我在下面编写了以下代码,它从本地文件夹中获取城市列表,并调用该列表中每个城市的天气数据,最终将这些天气值上传到 BigQuery 中的表中。我不再需要更改代码,因为它会在新的一周开始时创建新表,现在我想在云中“部署”(我什至不确定这是否称为部署代码)自动运行在那里。我尝试使用 App Engine 和 Cloud Functions,但在这两个地方都遇到了问题。
import requests, json, sqlite3, os, csv, datetime, re
from google.cloud import bigquery
#from google.cloud import storage
list_city = []
with open("list_of_cities.txt", "r") as pointer:
for line in pointer:
list_city.append(line.strip())
API_key = "PLACEHOLDER"
Base_URL = "http://api.weatherapi.com/v1/history.json?key="
yday = datetime.date.today() - datetime.timedelta(days = 1)
Date = yday.strftime("%Y-%m-%d")
table_id = f"sonic-cat-315013.weather_data.Historical_Weather_{yday.isocalendar()[0]}_{yday.isocalendar()[1]}"
credentials_path = r"PATH_TO_JSON_FILE"
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path
client = bigquery.Client()
try:
schema = [
bigquery.SchemaField("city", "STRING", mode="REQUIRED"),
bigquery.SchemaField("Date", "Date", mode="REQUIRED"),
bigquery.SchemaField("Hour", "INTEGER", mode="REQUIRED"),
bigquery.SchemaField("Temperature", "FLOAT", mode="REQUIRED"),
bigquery.SchemaField("Humidity", "FLOAT", mode="REQUIRED"),
bigquery.SchemaField("Condition", "STRING", mode="REQUIRED"),
bigquery.SchemaField("Chance_of_rain", "FLOAT", mode="REQUIRED"),
bigquery.SchemaField("Precipitation_mm", "FLOAT", mode="REQUIRED"),
bigquery.SchemaField("Cloud_coverage", "INTEGER", mode="REQUIRED"),
bigquery.SchemaField("Visibility_km", "FLOAT", mode="REQUIRED")
]
table = bigquery.Table(table_id, schema=schema)
table.time_partitioning = bigquery.TimePartitioning(
type_=bigquery.TimePartitioningType.DAY,
field="Date", # name of column to use for partitioning
)
table = client.create_table(table) # Make an API request.
print(
"Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id)
)
except:
print("Table {}_{} already exists".format(yday.isocalendar()[0], yday.isocalendar()[1]))
def get_weather():
try:
x["location"]
except:
print(f"API could not call city {city_name}")
global day, time, dailytemp, dailyhum, dailycond, chance_rain, Precipitation, Cloud_coverage, Visibility_km
day = []
time = []
dailytemp = []
dailyhum = []
dailycond = []
chance_rain = []
Precipitation = []
Cloud_coverage = []
Visibility_km = []
for i in range(24):
dayval = re.search("^\S*\s" ,x["forecast"]["forecastday"][0]["hour"][i]["time"])
timeval = re.search("\s(.*)" ,x["forecast"]["forecastday"][0]["hour"][i]["time"])
day.append(dayval.group()[:-1])
time.append(timeval.group()[1:])
dailytemp.append(x["forecast"]["forecastday"][0]["hour"][i]["temp_c"])
dailyhum.append(x["forecast"]["forecastday"][0]["hour"][i]["humidity"])
dailycond.append(x["forecast"]["forecastday"][0]["hour"][i]["condition"]["text"])
chance_rain.append(x["forecast"]["forecastday"][0]["hour"][i]["chance_of_rain"])
Precipitation.append(x["forecast"]["forecastday"][0]["hour"][i]["precip_mm"])
Cloud_coverage.append(x["forecast"]["forecastday"][0]["hour"][i]["cloud"])
Visibility_km.append(x["forecast"]["forecastday"][0]["hour"][i]["vis_km"])
for i in range(len(time)):
time[i] = int(time[i][:2])
def main():
i = 0
while i < len(list_city):
try:
global city_name
city_name = list_city[i]
complete_URL = Base_URL + API_key + "&q=" + city_name + "&dt=" + Date
response = requests.get(complete_URL, timeout = 10)
global x
x = response.json()
get_weather()
table = client.get_table(table_id)
varlist = []
for j in range(24):
variables = city_name, day[j], time[j], dailytemp[j], dailyhum[j], dailycond[j], chance_rain[j], Precipitation[j], Cloud_coverage[j], Visibility_km[j]
varlist.append(variables)
client.insert_rows(table, varlist)
print(f"City {city_name}, ({i+1} out of {len(list_city)}) successfully inserted")
i += 1
except Exception as e:
print(e)
continue
在代码中,直接引用了位于本地的两个文件,一个是城市列表,另一个是包含在 GCP 中访问我的项目的凭据的 JSON 文件。我相信将这些文件上传到 Cloud Storage 并在那里引用它们不会有问题,但后来我意识到如果不使用凭据文件,我实际上无法访问 Cloud Storage 中的存储桶。
这导致我不确定整个过程是否可行,如果我需要首先在本地引用它,我如何首先从云端进行身份验证?似乎是一个无限循环,我将从 Cloud Storage 中的文件进行身份验证,但我需要先进行身份验证才能访问该文件。
我真的很感谢这里的一些帮助,我不知道从哪里开始,而且我对 SE/CS 也没有很好的了解,我只知道 Python R 和 SQL。
【问题讨论】:
标签: authentication google-app-engine google-cloud-platform automation google-cloud-functions