fengting0913
import os
import magic

# 决定使用这种方式识别,描述较全面,利用关键字匹配,如果能匹配到,就确定匹配的关键字为其文件类型
# 如果匹配不到,就用之前的文件格式

# a = magic.from_file(path)
# b = magic.from_buffer(open(path).read(1024))
# c = magic.from_file(path, mime=True)
# print(a)

FILE_TYPE = {
    \'Executables\': [\'exe\', \'mz\', \'msi\', \'coff\', \'elf\', \'krnl\', \'rpm\', \'linux\', \'macho\'],
    \'Documents\': [\'ps\', \'rtf\', \'odp\', \'ods\', \'odt\', \'hwp\', \'gul\', \'ebook\', \'latex\'],
    \'Code\': [\'php\', \'python\', \'perl\', \'ruby\', \'cpp\', \'java\', \'shell\', \'pascal\', \'awk\', \'dyalog\',
             \'fortran\', \'java-bytecode\'],
    \'Bundles\': [\'zip\', \'gzip\', \'bzip\', \'rzip\', \'dzip\', \'7-zip\', \'cab\', \'jar\', \'rar\', \'mscompress\', \'ace\',
                \'arj\', \'asd\', \'blackhole\', \'kgb\'],
    \'Other\': [\'bat\', \'cmd\']
}


def identify_file_type():
    """
    文件类型识别
    :return:
    """

    # path = "D:/scripts/file/AF7.5.1.mf"
    path = "D:/scripts/file/CmdHelperService.7z"

    # 识别之前先根据文件后缀判断,排除以下文件格式:
    exclude_file_type = [\'txt\', \'pdf\', \'doc\', \'docx\', \'ppt\', \'pptx\', \'xls\', \'xlsx\', \'com\', \'mf\']

    file_type = os.path.splitext(path)
    if file_type[1]:
        file_type = file_type[1].strip(\'.\')
        if file_type not in exclude_file_type:
            file_type_info = magic.from_file(path)
            print(file_type_info)
            # TODO magic库识别后无法准确匹配的特殊文件类型,陆续添加
            # dll 类型
            if \'DLL\' in file_type_info:
                file_type = \'dll\'
            # vmdk 类型
            if \'VMware4 disk image\' in file_type_info:
                file_type = \'vmdk\'
            else:
                for file_types in FILE_TYPE.values():
                    for f_type in file_types:
                        if f_type.upper() in file_type_info or f_type.capitalize() in file_type_info or f_type in file_type_info:
                            file_type = f_type

    return file_type


file_type = identify_file_type()
print(file_type)


# 7-zip archive data, version 0.4
# 7-zip
                      

分类:

技术点:

相关文章: