【发布时间】:2019-06-02 13:57:12
【问题描述】:
希望在这里得到一些帮助。完整上下文这是我的第一个“有目的的”Python 脚本。在此之前,我只涉足了一点,老实说还在学习,所以也许我在这里跳得太早了。
长话短说,一直在修复各种类型的不匹配或只是一般的缩进问题(亲爱的蟒蛇大人对此并不宽容)。
我想我快完成了,但还有一些最后的问题。他们中的大多数似乎也来自同一部分。这个脚本只是意味着获取一个包含 3 列的 csv 文件,并使用它来基于第一列(iOS 或 Android)发送请求。问题是当我创建要发送的正文时... 这是代码(为了便于发布而省略了一些标记):
#!/usr/bin/python
# -*- coding: utf-8 -*-
import requests
import json
import pandas as pd
from tqdm import tqdm
from datetime import *
import uuid
import warnings
from math import isnan
import time
## throttling based on AF's 80 request per 2 minute rule
def throttle():
i = 0
while i <= 3:
print ("PAUSED FOR THROTTLING!" + "\n" + str(3-i) + " minutes remaining")
time.sleep(60)
i = i + 1
print (i)
return 0
## function for reformating the dates
def date():
d = datetime.utcnow() # # <-- get time in UTC
d = d.isoformat('T') + 'Z'
t = d.split('.')
t = t[0] + 'Z'
return str(t)
## function for dealing with Android requests
def android_request(madv_id,mtime,muuid,android_app,token,endpoint):
headers = {'Content-Type': 'application/json', 'Accept': 'application/json'}
params = {'api_token': token }
subject_identities = {
"identity_format": "raw",
"identity_type": "android_advertising_id",
"identity_value": madv_id
}
body = {
'subject_request_id': muuid,
'subject_request_type': 'erasure',
'submitted_time': mtime,
'subject_identities': dict(subject_identities),
'property_id': android_app
}
body = json.dumps(body)
res = requests.request('POST', endpoint, headers=headers,
data=body, params=params)
print("android " + res.text)
## function for dealing with iOS requests
def ios_request(midfa, mtime, muuid, ios_app, token, endpoint):
headers = {'Content-Type': 'application/json',
'Accept': 'application/json'}
params = {'api_token': token}
subject_identities = {
'identity_format': 'raw',
'identity_type': 'ios_advertising_id',
'identity_value': midfa,
}
body = {
'subject_request_id': muuid,
'subject_request_type': 'erasure',
'submitted_time': mtime,
'subject_identities': list(subject_identities),
'property_id': ios_app,
}
body = json.dumps(body)
res = requests.request('POST', endpoint, headers=headers, data=body, params=params)
print("ios " + res.text)
## main run function. Determines whether it is iOS or Android request and sends if not LAT-user
def run(output, mdf, is_test):
# # assigning variables to the columns I need from file
print ('Sending requests! Stand by...')
platform = mdf.platform
device = mdf.device_id
if is_test=="y":
ios = 'id000000000'
android = 'com.tacos.okay'
token = 'OMMITTED_FOR_STACKOVERFLOW_Q'
endpoint = 'https://hq1.appsflyer.com/gdpr/stub'
else:
ios = 'id000000000'
android = 'com.tacos.best'
token = 'OMMITTED_FOR_STACKOVERFLOW_Q'
endpoint = 'https://hq1.appsflyer.com/gdpr/opengdpr_requests'
for position in tqdm(range(len(device))):
if position % 80 == 0 and position != 0:
throttle()
else:
req_id = str(uuid.uuid4())
timestamp = str(date())
if platform[position] == 'android' and device[position] != '':
android_request(device[position], timestamp, req_id, android, token, endpoint)
mdf['subject_request_id'][position] = req_id
if platform[position] == 'ios' and device[position] != '':
ios_request(device[position], timestamp, req_id, ios, token, endpoint)
mdf['subject_request_id'][position] = req_id
if 'LAT' in platform[position]:
mdf['subject_request_id'][position] = 'null'
mdf['error status'][position] = 'Limit Ad Tracking Users Unsupported. Device ID Required'
mdf.to_csv(output, sep=',', index = False, header=True)
# mdf.close()
print ('\nDONE. Please see ' + output
+ ' for the subject_request_id and/or error messages\n')
## takes the CSV given by the user and makes a copy of it for us to use
def read(mname):
orig_csv = pd.read_csv(mname)
mdf = orig_csv.copy()
# Check that both dataframes are actually the same
# print(pd.DataFrame.equals(orig_csv, mdf))
return mdf
## just used to create the renamed file with _LOGS.csv
def rename(mname):
msuffix = '_LOG.csv'
i = mname.split('.')
i = i[0] + msuffix
return i
## adds relevant columns to the log file
def logs_csv(out, df):
mdf = df
mdf['subject_request_id'] = ''
mdf['error status'] = ''
mdf['device_id'].fillna('')
mdf.to_csv(out, sep=',', index=None, header=True)
return mdf
## solely for reading in the file name from the user. creates string out of filename
def readin_name():
mprefix = input('FILE NAME: ')
msuffix = '.csv'
mname = str(mprefix + msuffix)
print ('\n' + 'Reading in file: ' + mname)
return mname
def start():
print ('\nWelcome to GDPR STREAMLINE')
# # blue = OpenFile()
testing = input('Is this a test? (y/n) : ')
# return a CSV
name = readin_name()
import_csv = read(name)
output_name = rename(name)
output_file = logs_csv(output_name, import_csv)
run( output_name, output_file, testing)
# # print ("FILE PATH:" + blue)
## to disable all warnings in console logs
warnings.filterwarnings('ignore')
start()
这是错误堆栈跟踪:
Reading in file: test.csv
Sending requests! Stand by...
0%| | 0/384 [00:00<?, ?it/s]
Traceback (most recent call last):
File "a_GDPR_delete.py", line 199, in <module>
start()
File "a_GDPR_delete.py", line 191, in start
run( output_name, output_file, testing)
File "a_GDPR_delete.py", line 114, in run
android_request(device[position], timestamp, req_id, android, token, endpoint)
File "a_GDPR_delete.py", line 57, in android_request
body = json.dumps(body)
File "/Users/joseph/anaconda3/lib/python3.6/json/__init__.py", line 231, in dumps
return _default_encoder.encode(obj)
File "/Users/joseph/anaconda3/lib/python3.6/json/encoder.py", line 199, in encode
chunks = self.iterencode(o, _one_shot=True)
File "/Users/joseph/anaconda3/lib/python3.6/json/encoder.py", line 257, in iterencode
return _iterencode(o, 0)
File "/Users/joseph/anaconda3/lib/python3.6/json/encoder.py", line 180, in default
o.__class__.__name__)
TypeError: Object of type 'int64' is not JSON serializable
TL;DR: 在带有另一个嵌套 JSON 的 JSON 上调用它时出现 typeError。我已经确认嵌套的 JSON 是问题,因为如果我删除“subject_identities”部分,它会编译并工作......但我使用的 API 需要这些值,所以如果没有该部分,这实际上不会做任何事情。
下面是相关代码(在我第一次使用的版本中,WAS 之前工作过):
def android (madv_id, mtime, muuid):
headers = {
"Content-Type": "application/json",
"Accept": "application/json"
}
params = {
"api_token": "OMMITTED_FOR_STACKOVERFLOW_Q"
}
body = {
"subject_request_id": muuid, #muuid,
"subject_request_type": "erasure",
"submitted_time": mtime,
"subject_identities": [
{ "identity_type": "android_advertising_id",
"identity_value": madv_id,
"identity_format": "raw" }
],
"property_id": "com.tacos.best"
}
body = json.dumps(body)
res = requests.request("POST",
"https://hq1.appsflyer.com/gdpr/opengdpr_requests",
headers=headers, data=body, params=params)
我感觉我已经接近这项工作了。我早期有一个更简单的版本,但我重写了它以使其更具动态性并使用更少的硬编码值(这样我最终可以使用它来应用我正在使用的任何应用程序,而不仅仅是它制作的两个为)。
请客气,我对 python 完全陌生,而且对一般的编码也很生疏(因此尝试做这样的项目)
【问题讨论】:
-
这是一个旁白,但你的
def read(mname):函数有什么意义呢?为什么要创建副本并返回副本?原版不会发生任何事情。 -
无论如何,
device[position]你在这里传递:android_request(device[position], timestamp, req_id, android, token, endpoint)返回一个np.int64对象,json不会识别为 json 可序列化。只需将其转换为 int,所以int(device[position]) -
@juanpa.arrivillaga 对于 read(),据我所见,这个脚本最终编辑了我不想发生的原始 CSV。这是我试图创建一个新副本并且只使用它继续前进。此外,您给出的其他答案也可以,但需要进行一次调整,因为该函数中的值实际上不是整数。我传递的是像“ab12ab12-12ab-34cd-56ef-1234abcd5678”这样的ID,所以我不得不使用android_request(str(device [position]),timestamp,req_id,android,token,endpoint)
-
对
read_csv返回的数据框的修改不会修改 csv 文件。如果发生这种情况,.copy不会阻止它 -
@juanpa.arrivillaga 啊,很高兴知道。我实际上看到了这一点。会不会是我在
to_csv()通话中使用了错误的文件名?
标签: python json pandas python-requests