【发布时间】:2017-05-22 14:27:16
【问题描述】:
import os
import json
import csv
import re
subdir = "./json_files/" #'/home/varun/Desktop/pyfile'
def jsontocsv():
with open ('test.csv', 'w') as outfile:
fieldnames = ['name', 'private', 'version', 'dependencies', 'scripts', 'devDependencies']
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
writer.writeheader()
for file in os.listdir(subdir):
file_path = os.path.join(subdir, file)
with open(file_path, 'r') as json_file:
parsed_json = json.load(json_file)
with open ('test.csv', 'a') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(parsed_json.values())
def cleanUnicode():
with open ('data.csv', 'w') as outfile:
fieldnames = ['name', 'private', 'version', 'dependencies', 'scripts', 'devDependencies']
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
writer.writeheader()
with open('test.csv', 'r') as csvfile:
reader = csv.DictReader(csvfile, delimiter=',')
rows = list(reader)
for row in rows[1:]:
row = str(row)
row = re.sub(r'u', r'', row)
print(row)
# with open ('data.csv', 'a') as csvfile:
# fieldnames = ['name', 'private', 'version', 'dependencies', 'scripts', 'devDependencies']
# writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
# writer.writerow(row)
# os.remove('test.csv')
if __name__ == '__main__':
jsontocsv()
cleanUnicode()
print("Scripts finished running all json files parsed to csv")
我正在从多个 json 文件读取到单个 csv 文件中,在单个 csv 文件中获取数据,但每个嵌套值都有 'u。如何删除这些并仅保留我想要的数据?
示例输入:
{
"version": "0.1.0",
"devDependencies": {
"react-scripts": "0.6.1"
},
"dependencies": {
"crossfilter": "^1.3.12",
"d3": "^4.2.6",
"d3-scale": "^1.0.3",
"dc": "^2.0.0-beta.32",
"immutable": "^3.8.1",
"jszip": "^3.1.2",
"react": "^15.3.2",
"react-addons-transition-group": "^15.3.2",
"react-dom": "^15.3.2",
"shifty": "^1.5.2",
"wolfy87-eventemitter": "^5.1.0"
},
"scripts": {
"start": "react-scripts start",
"build": "react-scripts build",
"test": "react-scripts test --env=jsdom",
"eject": "react-scripts eject"
}
}
输出:
version,dependencies,scripts,devDependencies
0.1.0,"{u'wolfy87-eventemitter': u'^5.1.0', u'shifty': u'^1.5.2', u'react-addons-transition-group': u'^15.3.2', u'react-dom': u'^15.3.2', u'dc': u'^2.0.0-beta.32', u'ccbooleananalysis': u'^1.0.0', u'react': u'^15.3.2', u'jszip': u'^3.1.2', u'crossfilter': u'^1.3.12', u'ccnetviz': u'^1.0.8', u'immutable': u'^3.8.1', u'd3': u'^4.2.6', u'd3-scale': u'^1.0.3'}","{u'test': u'react-scripts test --env=jsdom', u'start': u'react-scripts start', u'build': u'react-scripts build', u'eject': u'react-scripts eject'}",{u'react-scripts': u'0.6.1'}
希望你们都被替换
【问题讨论】:
-
您确定
u是实际上在数据中吗? Python unicode 字符串在前面用u表示自己。这并不意味着u实际上就在数据本身中——这只是 python 在打印时告诉你它是一个 unicode 值的方式。 -
我在 Excel 表中打开了数据,其中包含 'u。也试过把网上的csv转json看看,json现在有unicode "devDependencies": "{u'react-scripts': u'0.6.1'}"
-
任何作为对象的值都有 unicode
-
在 Python 2 中,csv 模块不能很好地处理 Unicode,因此您需要对其进行显式解码和编码。并且你需要以二进制模式打开csv文件。
-
显示示例输入、错误输出和所需输出。
标签: python python-2.7 csv unicode