这是一个使用 pathlib.Path 而不是依赖 os.walk 的实现。它在迭代之前对目录内容进行排序,因此它应该可以在多个平台上重复。它还使用文件/目录的名称更新哈希,因此添加空文件和目录将更改哈希。
带有类型注释的版本(Python 3.6 或更高版本):
import hashlib
from _hashlib import HASH as Hash
from pathlib import Path
from typing import Union
def md5_update_from_file(filename: Union[str, Path], hash: Hash) -> Hash:
assert Path(filename).is_file()
with open(str(filename), "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash.update(chunk)
return hash
def md5_file(filename: Union[str, Path]) -> str:
return str(md5_update_from_file(filename, hashlib.md5()).hexdigest())
def md5_update_from_dir(directory: Union[str, Path], hash: Hash) -> Hash:
assert Path(directory).is_dir()
for path in sorted(Path(directory).iterdir(), key=lambda p: str(p).lower()):
hash.update(path.name.encode())
if path.is_file():
hash = md5_update_from_file(path, hash)
elif path.is_dir():
hash = md5_update_from_dir(path, hash)
return hash
def md5_dir(directory: Union[str, Path]) -> str:
return str(md5_update_from_dir(directory, hashlib.md5()).hexdigest())
没有类型注释:
import hashlib
from pathlib import Path
def md5_update_from_file(filename, hash):
assert Path(filename).is_file()
with open(str(filename), "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash.update(chunk)
return hash
def md5_file(filename):
return md5_update_from_file(filename, hashlib.md5()).hexdigest()
def md5_update_from_dir(directory, hash):
assert Path(directory).is_dir()
for path in sorted(Path(directory).iterdir()):
hash.update(path.name.encode())
if path.is_file():
hash = md5_update_from_file(path, hash)
elif path.is_dir():
hash = md5_update_from_dir(path, hash)
return hash
def md5_dir(directory):
return md5_update_from_dir(directory, hashlib.md5()).hexdigest()
如果您只需要对目录进行哈希处理,则为精简版:
def md5_update_from_dir(directory, hash):
assert Path(directory).is_dir()
for path in sorted(Path(directory).iterdir(), key=lambda p: str(p).lower()):
hash.update(path.name.encode())
if path.is_file():
with open(path, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash.update(chunk)
elif path.is_dir():
hash = md5_update_from_dir(path, hash)
return hash
def md5_dir(directory):
return md5_update_from_dir(directory, hashlib.md5()).hexdigest()
用法:md5_hash = md5_dir("/some/directory")