最近需要一个txt文件的批量转码功能,在网上找到一段批量处理java源文件的py程序如下:
1 #-*- coding: utf-8 -*- 2 3 import codecs 4 import os 5 import shutil 6 import re 7 import chardet 8 9 def convert_encoding(filename, target_encoding): 10 # Backup the origin file. 11 shutil.copyfile(filename, filename + '.bak') 12 13 # convert file from the source encoding to target encoding 14 content = codecs.open(filename, 'r').read() 15 source_encoding = chardet.detect(content)['encoding'] 16 print source_encoding, filename 17 content = content.decode(source_encoding) #.encode(source_encoding) 18 codecs.open(filename, 'w', encoding=target_encoding).write(content) 19 20 def main(): 21 for root, dirs, files in os.walk(os.getcwd()): 22 for f in files: 23 if f.lower().endswith('.txt'): 24 filename = os.path.join(root, f) 25 try: 26 convert_encoding(filename, 'utf-8') 27 except Exception, e: 28 print filename 29 30 def process_bak_files(action='restore'): 31 for root, dirs, files in os.walk(os.getcwd()): 32 for f in files: 33 if f.lower().endswith('.txt.bak'): 34 source = os.path.join(root, f) 35 target = os.path.join(root, re.sub('\.txt\.bak$', '.txt', f, flags=re.IGNORECASE)) 36 try: 37 if action == 'restore': 38 shutil.move(source, target) 39 elif action == 'clear': 40 os.remove(source) 41 except Exception, e: 42 print source 43 44 if __name__ == '__main__': 45 # process_bak_files(action='clear') 46 main()