cutepig

写了一个自动用google翻译文档的工具

写了一个自动用google翻译文档的工具

features:

  • [x] 支持word
  • [x] 每一个段落下面放上对照的翻译
from googletrans import Translator
import sys
import docx

fname = sys.argv[1] if len(
    sys.argv) > 1 else r\'F:\GoogleDriveSync3\jobrelated\The Fast Forward MBA in Project Management ( PDFDrive.com ).full.docx\'


translator = Translator()
foname = fname + \'-cn.docx\'
doc = docx.Document(fname)
docdes = docx.Document(fname)

N = len(doc.paragraphs)
for i in range(N):
    print(1.0*i/N,)
    subCont = doc.paragraphs[i].text
    try:
        s = translator.translate(subCont, src=\'en\', dest=\'zh-cn\')
        docdes.paragraphs[i].add_run(\'\n\' + str(s.text) + \'\n\')
    except Exception as e:
        print(\'except:\', e)
        
docdes.save(foname)

from googletrans import Translator
import sys, os
import docx

fname = sys.argv[1] if len(
    sys.argv) > 1 else r\'D:\Users\cutep\Downloads\Throw-Away-the-First-90-Days.docx\'

def trans(fname):
    translator = Translator()
    foname = fname + \'-cn.docx\'
    doc = docx.Document(fname)
    docdes = docx.Document(fname)

    N = len(doc.paragraphs)
    NextTarget = 0.1
    i = 0
    while i<N:
        percentage = 1.0*i/N
        if i%10==0: print(percentage)
        if percentage>NextTarget:
            outputfile = \'%s-%.2f-cn.docx\'%(fname, NextTarget)
            print(outputfile)
            docdes.save(outputfile)
            NextTarget = NextTarget + 0.1

        spacer = \'\n========================\n\'
        spacer_short = \'========================\'
        subCont = doc.paragraphs[i].text
        j = i+1
        while len(subCont)<4500 and j<N:
            subCont = subCont + spacer + doc.paragraphs[j].text
            j = j+1
        print(i,j)
        if subCont.strip():
            #try:
            s = translator.translate(subCont, src=\'en\', dest=\'zh-cn\')
            ss = s.text.split(spacer_short)
            assert len(ss)==j-i, \'%d, %d\'%(len(ss), j-i)
            for k in range(j-i):
                docdes.paragraphs[k+i].add_run(\'\n\' + ss[k] + \'\n\')
            #except Exception as e:
            #    print(\'except:\', e)
        i = j

    docdes.save(foname)

if __name__ == \'__main__\':
    if os.path.isfile(fname):
        trans(fname)
    else:
        from multiprocessing import Process

        ps=[]
        for filename in os.listdir(fname):
            if filename.lower().endswith(\'.docx\'):
                p = Process(target=trans, args=(fname + \'\\\' + filename,))
                p.start()
                ps.append(p)

        for p in ps:
            p.join()

分类:

技术点:

相关文章:

  • 2022-12-23
  • 2022-01-02
  • 2021-10-12
  • 2022-02-18
  • 2021-12-13
  • 2021-07-19
  • 2021-09-08
  • 2022-01-03
猜你喜欢
  • 2022-12-23
  • 2021-12-19
  • 2021-10-31
  • 2021-11-20
  • 2022-12-23
  • 2022-01-08
  • 2021-05-11
相关资源
相似解决方案