【发布时间】:2021-09-27 09:54:12
【问题描述】:
我正在尝试将 json 转换为 excel 文件,但我遇到了类型错误,我无法解决该错误。 该代码首先将 json 数据转换为字典(对于剪辑事实,我也需要它),然后将其转换为 excel。 任何帮助,将不胜感激。先感谢您。 以下是我的代码:
from io import BytesIO
import sys
sys.path.append('/usr/lib/python3/dist-packages')
import json
import openpyxl
from openpyxl import load_workbook
import logging
logger = logging.getLogger('ExtractJSON')
class ExtractJSON(object):
def __init__(self):
""" Initialisierung """
pass
def convert(self, data):
""" PDF Extract API """
#jsondata = self.json2excel(data)
jsondata = self.json2csv(data)
return jsondata
#=======================================================================
# JSON to Excel
#=======================================================================
def json2excel(self, data):
""" JSON to Excel """
header = ["Seite","ID","Text","Pfad","Fett","Kursiv","Unterstrichen","Versal","PT","Alignment","BlockAlign","InlineAlign","Col","Row","Width",\
"Height","LineHeight","SpaceAfter","Placement","Bemerkung","Bounds","ClipBounds","BBox","Font","HasClip","Lang"]
jsondata = json.loads(data)
elements = jsondata['elements']
excelrow = 1
sid = 0
hd = 1
wb = openpyxl.Workbook()
sheet = wb.active
for head in header:
sheet.cell(excelrow, hd).value = head
hd += 1
for elm in elements:
sid += 1
excelrow += 1
cl = 0
row = self.createRow(elm, sid)
if row is None:
sheet.cell(excelrow, 1).value = "....."
continue
for column in header: # TODO: enumerate!
cl += 1
sheet.cell(excelrow, cl).value = row.get(column, '?????')
kids = elm.get('Kids', [])
kid = 0
for kidelm in kids:
kid += 1
excelrow += 1
cl = 0
sidstr = f"{sid}.{kid}"
row = self.createRow(kidelm, sidstr)
for column in header: # TODO: enumerate! # TODO: refactor
cl += 1
sheet.cell(excelrow, cl).value = row.get(column, '?????')
# FIXME: Check: Do we have recursive 'Kids' in the JSON file?
wbresult = BytesIO()
wb.save(wbresult)
return wbresult.getvalue()
def createRow(self, elm, sid):
""" For Excel """
try:
page = elm.get('Page', '')
if page or page == 0:
page += 1
xpath = elm['Path']
text = elm.get('Text', elm.get('text', ''))
font = elm.get('Font', {})
if not text and not font and xpath.find("Figure") > -1: # TODO temporary
logger.info("Figure found, no font and no text, continue..")
return ""
bold = "x" if font.get('weight', '') == 700 else ''
italic = "x" if font.get('italic', '') else ''
underline = font.get('underline', '') # TODO No example yet
versal = font.get('capital', '') # TODO No example yet
fontsize = elm.get('TextSize', '')
if fontsize and (abs(round(fontsize) - fontsize) < 0.10):
fontsize = round(fontsize)
attributes = elm.get('attributes', {})
alignment = attributes.get('TextAlign', '')
lineHeight = attributes.get('LineHeight', '')
spaceAfter = attributes.get('SpaceAfter', '')
placement = attributes.get('Placement', '')
note = ""
bounds = elm.get('Bounds', '')
clipBounds = elm.get('ClipBounds', '')
if bounds:
bounds = ", ".join([str(round(bd)) for bd in bounds])
if clipBounds:
clipBounds = ", ".join([str(round(bd)) for bd in clipBounds])
font = str(font)
hasClip = elm.get('HasClip', '')
lang = elm.get('Lang', '')
# Tabelle
blockAlign = attributes.get('BlockAlign', '')
inlineAlign = attributes.get('InlineAlign', '')
col = attributes.get('ColIndex', '')
row = attributes.get('RowIndex', '')
if col or col == 0:
col += 1
if row or row == 0:
row += 1
width = attributes.get('width', '')
height = attributes.get('height', '')
bBox = attributes.get('BBox', '')
if bBox:
bBox = ", ".join([str(round(bd)) for bd in bBox])
#
result = {'Seite':page, 'ID':sid, 'Text':text, 'Pfad':xpath, 'Fett':bold, 'Kursiv':italic, 'Unterstrichen':underline,\
'Versal':versal, 'PT':fontsize, 'Alignment':alignment, 'BlockAlign':blockAlign, 'InlineAlign':inlineAlign,\
'Col':col, 'Row':row, 'Width':width, 'Height':height, 'LineHeight':lineHeight, 'SpaceAfter':spaceAfter,\
'Placement':placement, 'Bemerkung':note, 'Bounds':bounds, 'ClipBounds':clipBounds, 'BBox':bBox,\
'Font':font, 'HasClip':hasClip, 'Lang':lang}
except Exception as e:
logger.exception(e)
logger.info(elm)
return ""
return result
#=======================================================================
# JSON to CSV (Alternative for JSON to Excel)
#=======================================================================
def json2csv(self, data):
""" JSON to CSV """
result = "Seite;ID;Text;Pfad;Fett;Kursiv;Unterstrichen;Versal;PT;Alignment;BlockAlign;InlineAlign;Col;Row;Width;Height;LineHeight;SpaceAfter;Placement;"\
"Bemerkung;Bounds;ClipBounds;BBox;Font;HasClip;Lang\n"
jsondata = json.loads(data)
elements = jsondata['elements']
sid = 0
for elm in elements:
sid += 1
result += self.createRow(elm, sid)
kids = elm.get('Kids', [])
kid = 0
for kidelm in kids:
kid += 1
sidstr = f"{sid}.{kid}"
result += self.createRowForCSV(kidelm, sidstr)
filename=json2excel.csv")
return result
def createRowForCSV(self, elm, sid):
""" """
try:
page = elm.get('Page', '')
if page or page == 0:
page += 1
xpath = elm['Path']
text = elm.get('Text', elm.get('text', ''))
font = elm.get('Font', {})
if not text and not font and xpath.find("Figure") > -1: # TODO temporary
logger.info("Figure found, no font and no text, continue..")
return ""
bold = "x" if font.get('weight', '') == 700 else ''
italic = "x" if font.get('italic', '') else ''
underline = font.get('underline', '') # TODO No example yet
versal = font.get('capital', '') # TODO No example yet
fontsize = elm.get('TextSize', '')
if fontsize and (abs(round(fontsize) - fontsize) < 0.10):
fontsize = round(fontsize)
attributes = elm.get('attributes', {})
alignment = attributes.get('TextAlign', '')
lineHeight = attributes.get('LineHeight', '')
spaceAfter = attributes.get('SpaceAfter', '')
placement = attributes.get('Placement', '')
note = ""
bounds = elm.get('Bounds', '')
clipBounds = elm.get('ClipBounds', '')
if bounds:
bounds = ", ".join([str(round(bd)) for bd in bounds])
if clipBounds:
clipBounds = ", ".join([str(round(bd)) for bd in clipBounds])
font = str(font)
hasClip = elm.get('HasClip', '')
lang = elm.get('Lang', '')
# Tabelle
blockAlign = attributes.get('BlockAlign', '')
inlineAlign = attributes.get('InlineAlign', '')
col = attributes.get('ColIndex', '')
row = attributes.get('RowIndex', '')
if col or col == 0:
col += 1
if row or row == 0:
row += 1
width = attributes.get('width', '')
height = attributes.get('height', '')
bBox = attributes.get('BBox', '')
if bBox:
bBox = ", ".join([str(round(bd)) for bd in bBox])
#
result = f"{page};'{sid}';'{text}';{xpath};{bold};{italic};{underline};{versal};{fontsize};{alignment};{blockAlign};{inlineAlign};{col};{row};\
{width};{height};'{lineHeight}';'{spaceAfter}';{placement};{note};{bounds};{clipBounds};{bBox};{font};{hasClip};{lang}\n"
except Exception as e:
logger.exception(e)
logger.info(elm)
return ""
return result
INPUTPATH = "/home/abc/Clips/JSON/structuredData.json"
OUTPUTPATH = "/home/abc/Clips/JSON/result.xlsx"
if __name__ == '__main__':
converter = ExtractJSON()
f = open(INPUTPATH, "r"); data = f.read(); f.close()
print ("JSON converting..")
result = converter.convert(data)
g = open(OUTPUTPATH, "wb"); g.write(result); g.close()
print (f"Excel stored on {OUTPUTPATH}")
sys.exit(0)
我得到的错误是:
JSON converting..
Traceback (most recent call last):
File "/home/abc/Clips/JSON/json2excel.py", line 222, in <module>
result = converter.convert(data)
File "/home/abc/Clips/JSON/json2excel.py", line 23, in convert
jsondata = self.json2csv(data)
File "/home/abc/Clips/JSON/json2excel.py", line 146, in json2csv
result += self.createRow(elm, sid)
TypeError: can only concatenate str (not "dict") to str
【问题讨论】:
-
在第 146 行,
result是一个字符串,但是从self.createRow(...)返回的值是一个字典(除非你得到一个异常,它返回 "")