1.读excel
import xlrd # 打开excel data = xlrd.open_workbook("Gitee.xlsx") table = data.sheet_by_name("程序开发") # # 选择的表单页 # print(table.nrows) # 多少行 # print(table.ncols) # 多少列 # print(table.row_values(1)) # 取出第一行数据 for i in range(1, 81): print(table.row_values(i))
2.写excel ---openpyxl
# 写 from openpyxl import Workbook wb = Workbook() # 先生成一个工作簿 wb1 = wb.create_sheet(\'index3\',0) # 创建一个表单页 后面可以通过数字控制位置 wb2 = wb.create_sheet(\'index1\') wb1.title = \'login\' # 后期可以通过表单页对象点title修改表单页名称 wb1[\'A3\'] = 666 wb1[\'A4\'] = 444 wb1.cell(row=6,column=3,value=88888888) wb1[\'A5\'] = \'=sum(A3:A4)\' # # wb2[\'G6\'] = 999 wb1.append([\'username\',\'age\',\'hobby\']) wb1.append([\'nick\',28,]) wb1.append([\'nick\',\'\',\'秃头\']) # 保存新建的excel文件 wb.save(\'吴凯.xlsx\')
3.写文件 xlwt
import xlwt # 创建一个workbook 设置编码 workbook = xlwt.Workbook(encoding=\'utf-8\') # 创建一个worksheet worksheet = workbook.add_sheet(\'sheet\') worksheet.write(0, 0, label=\'核心参数\') worksheet.write(0, 1, label=\'参数1\') workbook.save(\'lichuang.xls\')
4.with open 操作excel
import csv with open(\'data.csv\', \'a\', newline="",encoding=\'utf-8\') as filecsv: csvwriter = csv.writer(filecsv, delimiter=\',\') csvwriter.writerow([\'凯帅\',\'美女\',\'人才\'])
5.操作word文档
import docx file = docx.Document("D:\code\客户\脚本制作_guojieli\出院小结.docx") print("段落数:",str(len(file.paragraphs))) # 输出每一段 list1 = [] for para in file.paragraphs: # print(para.text) list1.append(para.text) # 输出段落编号及段落内容 for i in range(len(file.paragraphs)): print("第:"+str(i)+"段的内容是:"+file.paragraphs[i].text)
6.excel格式操作
from openpyxl import Workbook from openpyxl.styles import Alignment,PatternFill wb = Workbook() wb1 = wb.create_sheet(\'sh\', 0) wb1.merge_cells("A1:A2") # 合并表单 # wb1.merge_cells("A2:C2") # wb1.merge_cells("A1:A2") wb1.cell(1, 1).value = \'你就是个大石坝\' # 表单赋值 wb1[\'A1\'].fill = PatternFill(fgColor="00C0C0C0", fill_type=\'solid\') # 修改颜色 wb1[\'A1\'].alignment = Alignment(horizontal="center", vertical="center") # 合并居中 wb1.row_dimensions[3].height = 20 # 加高 wb1.column_dimensions[\'D1\'].width = 30 # 加宽 # wb1.cell(1, 2).value = \'2\' # wb1.cell(1, 3).value = \'3\' # wb1.cell(2, 1).value = \'4\' # wb1.cell(2, 2).value = \'5\' # wb1.cell(2, 3).value = \'6\' # wb1.merge_cells("A1:D2") # wb1.append(["美女"]) nows = wb1.max_row print(nows) # wb1.append(["帅哥"]) nows = wb1.max_row print(nows) wb.save(\'data.xlsx\')
7.操作pdf
# import importlib, sys # importlib.reload(sys) from pdfminer.pdfparser import PDFParser, PDFDocument from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter from pdfminer.converter import PDFPageAggregator from pdfminer.layout import LTTextBoxHorizontal, LAParams from pdfminer.pdfinterp import PDFTextExtractionNotAlloweddef parse(DataIO): # 用文件对象创建一个PDF文档分析器 parser = PDFParser(DataIO) # 创建一个PDF文档 doc = PDFDocument() # 分析器和文档相互连接 parser.set_document(doc) doc.set_parser(parser) # 提供初始化密码,没有默认为空 doc.initialize() # 检查文档是否可以转成TXT,如果不可以就忽略 if not doc.is_extractable: raise PDFTextExtractionNotAllowed else: # 创建PDF资源管理器,来管理共享资源 rsrcmagr = PDFResourceManager() # 创建一个PDF设备对象 laparams = LAParams() # 将资源管理器和设备对象聚合 device = PDFPageAggregator(rsrcmagr, laparams=laparams) # 创建一个PDF解释器对象 interpreter = PDFPageInterpreter(rsrcmagr, device) # 循环遍历列表,每次处理一个page内容 # doc.get_pages()获取page列表 for page in doc.get_pages(): interpreter.process_page(page) # 接收该页面的LTPage对象 layout = device.get_result() str = "" for x in layout: try: if (isinstance(x, LTTextBoxHorizontal)): result = x.get_text() str = str + result # print(result) except: print("Failed") # time.sleep(20) def main(): with open(r\'D:\code\客户\pdf转excel提取_穷人也要买东东\Supplier release 232492 20200807201958.PDF\', \'rb\') as pdf_html: parse(pdf_html) main()