# 安装 pip install pdfplumber import pdfplumber # 利用pdfplumber提取文字 with pdfplumber.open(\'基于python的网页爬虫.pdf\') as pdf: first_page = pdf.pages[0] print(first_page.extract_text()) # 利用pdfplumber单个提取表格 with pdfplumber.open(\'基于python的网页爬虫.pdf\') as pdf: first_page = pdf.pages[0] print(first_page.extract_table()) # 利用pdfplumber多个提取表格 with pdfplumber.open(\'基于python的网页爬虫.pdf\') as pdf: first_page = pdf.pages[0] for table in first_page.extract_tables(): print(table) # 利用pdfplumber单个提取财报 table_settings: 提取表格是的设定 with pdfplumber.open(\'基于python的网页爬虫.pdf\') as pdf: first_page = pdf.pages[0] table = first_page.extract_tables( table_settings={ \'vertical_strategy\': \'text\', \'horizontal_strategy\': \'text\' } ) new_table = [] for row in table: new_row = [] # 如果不是空行 if not \'\'.join([str(item) for item in row]) == \'\': # 合并单词 new_row.append(\'\'.join([str(item) if item else \'\' for item in row[:3]])) new_row += row[3:] new_table.append(new_row) print(new_table)