读取指定目录下的文件,提取文件中的所有汉字

# -*- coding: utf-8 -*-

import os
import io
import re

fo = open("word.txt", "w")

# 遍历指定目录,显示目录下的所有文件名
def each_file(filepath):
  for root, dirs, files in os.walk(filepath):
    for file in files:
      filename = os.path.join(root, file)
      read_file(filename)

def read_file(filename):
  with io.open(filename, 'r', encoding='utf-8', errors='ignore') as fn:
    lines = fn.readlines()
    han = re.compile('"[\u4e00-\u9fff]+"').findall(str(lines))
    for val in han:
      fo.write(val + "\n")



if __name__ == '__main__':
  each_file("src")

相关文章:

  • 2021-12-12
  • 2022-12-23
  • 2022-12-23
  • 2021-12-29
  • 2021-07-17
  • 2022-12-23
  • 2022-12-23
  • 2022-12-23
猜你喜欢
  • 2021-11-30
  • 2022-12-23
  • 2021-11-25
  • 2021-06-18
  • 2022-12-23
  • 2022-12-23
  • 2021-11-17
相关资源
相似解决方案