我和同事分别用.net 和 python来实现这个功能。
做好后发现.net 转的时间很慢,python代码少,时间快,最终采用了python 我写的代码。
比较特殊的是poppler-0.68.0。大家可以参考下百度。
具体代码如下:
from pdf2image import convert_from_path
from pathlib import Path
from os import listdir
from PIL import Image
import os,time
from shutil import copyfile
import shutil
def pdf_to_image(pdf_filename):
#判断路径是否存在
if not pdf_filename.upper().endswith(".PDF"):
return
print(\'处理 pdf_filename:\', pdf_filename)
filename_withoutext = pdf_filename.split(\'.\')[0]
out_path = Path(filename_withoutext)
print(\'out_path\',out_path)
out_path_full = os.path.join(path_to_watch, out_path)
print(\'完整路径:\',out_path_full)
out_path_full_check=Path(out_path_full)
if not out_path_full_check.exists():
print(\'创建目录:\', out_path_full)
os.mkdir(out_path_full)
print(\'开始转换\')
pdf_filename = os.path.join(path_to_watch, pdf_filename)
print(\'filename:\', pdf_filename)
pages = convert_from_path(pdf_filename, dpi=400, output_folder=None, fmt="JPEG",
thread_count=5)
pindex=1
for p in pages:
p_f = os.path.join(out_path_full,str(pindex)+\'.jpg\')
p.save(p_f)
pindex=pindex+1
time.sleep(1)
print(\'转换完成\')
contact_image(out_path_full)
print(\'合并完成\')
path_file = pdf_filename.split(\'.\')[0]
sub_path = os.path.join(path_to_watch, path_file)
print(\'删除目录\', sub_path)
shutil.rmtree(sub_path)
def watch():
while 1:
time.sleep(3)
#print(\'扫描目录的PDF文件\')
pdf_files = dict([(f, None) for f in os.listdir(path_to_watch) if f.upper().endswith(\'.PDF\') ])
for f in pdf_files:
f_full = os.path.join(path_to_watch, f)
f_jpg=f.split(\'.\')[0]+\'.jpg\'
f_jpg_full=os.path.join(path_to_watch,f_jpg)
print(f_jpg_full)
if not os.path.exists(f_jpg_full):
print(f_full)
time.sleep(1)
print(\'文件名:\', f_full)
pdf_to_image(f)
#return
#while 1:
#return
# before = dict([(f, None) for f in os.listdir(path_to_watch)])
# time.sleep(1)
# after = dict([(f,None) for f in os.listdir(path_to_watch)])
# added = [ f for f in after if not f in before]
# removed =[f for f in before if not f in after]
# if added:
# for f_add in added:
# time.sleep(1)
# print(\'文件名:\',os.path.join(path_to_watch,f_add))
# pdf_to_image(f_add)
# path_file=f_add.split(\'.\')[0]
# print(\'删除目录\')
# shutil.rmtree(os.path.join(path_to_watch, path_file))
# if removed:
# for f_r in removed:
# print(\'删除:\', os.path.join(path_to_watch, f_r))
# before = after
def open_image(out_path_full,fn):
image_file = os.path.join(out_path_full,fn)
print(\'打开图片路径\', image_file)
return Image.open(image_file)
def contact_image(out_path_full):
print(\'开始合并\')
print(\'合并路径:\',out_path_full)
image_list = [open_image(out_path_full, fn)
for fn in listdir(out_path_full) if fn.endswith(\'.jpg\')]
print(\'图片数量:\',len(image_list))
images=[]
width=0
height=0
total_height=0
max_width=0
for i in image_list:
if i.size[0]>width or i.size[1]>height:
width, height = i.size
print(\'width %d,height %d \' % (width, height))
if height>width:
new_image = i.resize((1102, 1564), Image.BILINEAR) # 551*782
images.append(new_image)
total_height = total_height+1564
max_width=1102
else:
new_image = i.resize((1102, 776), Image.BILINEAR) # 551*782
images.append(new_image)
total_height = total_height+776
max_width = 1102
result = Image.new(images[0].mode, (max_width, total_height), "white")
print(\'total_height:\', total_height)
save_path = out_path_full+".jpg"
#copy_to=out_path_full+".swf"
print(\'save path:\',save_path)
height_total=0
for i,im in enumerate(images):
height_im=im.size[1]
print(\'height_im %d\' % height_im)
result.paste(im, box=(0, height_total))
result.save(save_path)
height_total = height_total+height_im
#copyfile(save_path,copy_to)
path_to_watch = "D:\\PDFS"
print(\'监听目录:\', path_to_watch)
if __name__==\'__main__\':
watch()