写了一个自动用google翻译文档的工具
写了一个自动用google翻译文档的工具
features:
- [x] 支持word
- [x] 每一个段落下面放上对照的翻译
from googletrans import Translator
import sys
import docx
fname = sys.argv[1] if len(
sys.argv) > 1 else r\'F:\GoogleDriveSync3\jobrelated\The Fast Forward MBA in Project Management ( PDFDrive.com ).full.docx\'
translator = Translator()
foname = fname + \'-cn.docx\'
doc = docx.Document(fname)
docdes = docx.Document(fname)
N = len(doc.paragraphs)
for i in range(N):
print(1.0*i/N,)
subCont = doc.paragraphs[i].text
try:
s = translator.translate(subCont, src=\'en\', dest=\'zh-cn\')
docdes.paragraphs[i].add_run(\'\n\' + str(s.text) + \'\n\')
except Exception as e:
print(\'except:\', e)
docdes.save(foname)
from googletrans import Translator
import sys, os
import docx
fname = sys.argv[1] if len(
sys.argv) > 1 else r\'D:\Users\cutep\Downloads\Throw-Away-the-First-90-Days.docx\'
def trans(fname):
translator = Translator()
foname = fname + \'-cn.docx\'
doc = docx.Document(fname)
docdes = docx.Document(fname)
N = len(doc.paragraphs)
NextTarget = 0.1
i = 0
while i<N:
percentage = 1.0*i/N
if i%10==0: print(percentage)
if percentage>NextTarget:
outputfile = \'%s-%.2f-cn.docx\'%(fname, NextTarget)
print(outputfile)
docdes.save(outputfile)
NextTarget = NextTarget + 0.1
spacer = \'\n========================\n\'
spacer_short = \'========================\'
subCont = doc.paragraphs[i].text
j = i+1
while len(subCont)<4500 and j<N:
subCont = subCont + spacer + doc.paragraphs[j].text
j = j+1
print(i,j)
if subCont.strip():
#try:
s = translator.translate(subCont, src=\'en\', dest=\'zh-cn\')
ss = s.text.split(spacer_short)
assert len(ss)==j-i, \'%d, %d\'%(len(ss), j-i)
for k in range(j-i):
docdes.paragraphs[k+i].add_run(\'\n\' + ss[k] + \'\n\')
#except Exception as e:
# print(\'except:\', e)
i = j
docdes.save(foname)
if __name__ == \'__main__\':
if os.path.isfile(fname):
trans(fname)
else:
from multiprocessing import Process
ps=[]
for filename in os.listdir(fname):
if filename.lower().endswith(\'.docx\'):
p = Process(target=trans, args=(fname + \'\\\' + filename,))
p.start()
ps.append(p)
for p in ps:
p.join()