【问题标题】:Mock a file system with os.walk and mock files Python使用 os.walk 和模拟文件 Python 模拟文件系统
【发布时间】:2022-01-18 20:34:59
【问题描述】:

我编写了一个代码,它将目录列表作为输入,并给出一个包含重复文件的字典(即使名称不同):

Key = MD5 on the bits of the file. 

Value = List of all the locations that the file is in.

问题是我需要使用模拟进行单元测试,但我不知道应该在哪里使用模拟以及如何正确地进行测试。

我尝试模拟 os.walk 并模拟文件以制作假文件系统,但它不起作用!

当我尝试使用我的 MD5 哈希函数时,它会抛出 > afile = open(path, 'rb')

FileNotFoundError: [Errno 2] No such file or directory: '/test\\mock_file'

我的代码:

import os
from pathlib import Path
import hashlib


def findDuplicate(path_arr):
    duplic = {}
    for dir_path in path_arr:
        joinDict(duplic, iterOverDirectory(dir_path))
    return {k: v for k, v in duplic.items() if len(v) > 1}


def joinDict(dict1, dict2):
    for key in dict2:
        if key in dict1:
            dict1[key].union(dict2[key])
        else:
            dict1[key] = dict2[key]


def iterOverDirectory(dir_path):
    dup = {}
    folders = Path(dir_path)
    # files = sorted(os.listdir(folders))
    for (root, dirs, files) in os.walk(folders, topdown=True):
        for f in files:
            path = os.path.join(root, f)
            file_hash = hashFile(path)
            if file_hash not in dup:
                dup[file_hash] = set()
            dup[file_hash].add(path)
    return dup

def hashFile(path):
    # Opening file in afile
    afile = open(path, 'rb')
    hasher = hashlib.md5()
    blocksize = 65536
    buf = afile.read(blocksize)

    while len(buf) > 0:
        hasher.update(buf)
        buf = afile.read(blocksize)
    afile.close()
    return hasher.hexdigest()


测试:

from unittest import mock
from unittest.mock import patch, mock_open

import pytest

from main import *
@mock.patch('os.walk')
def test_find(mockwalk):
    with patch("builtins.open", mock_open(read_data="data")) as mock_file:
        assert open("/test/mock_file").read() == "data"
    mock_file.assert_called_with("/test/mock_file")
    with patch("builtins.open", mock_open(read_data="data")) as mock_file2:
        assert open("/test/subtest/mock_file").read() == "data"
    mock_file2.assert_called_with("/test/subtest/mock_file")
    mockwalk.return_value = [
        ('/test', ('subtest',), ("mock_file",)),
        ('/test/subtest', (), ('spam', "mock_file2")),
    ]
    ans = findDuplicate(["input"])

【问题讨论】:

  • 对于大量基于文件系统的测试,您可以使用像 pyfakefs 这样的假文件系统(免责声明:我是 pyfakefs 的贡献者)。
  • @MrBeanBremen 好吧,我试图了解如何使用它,但不是很清楚.. 你介意帮助我吗?
  • 今晚之前我没有时间,但是你检查过documentation吗?
  • @MrBeanBremen 是的,我试过了......但我不能用模拟做同样的事情吗?
  • 好的,我仔细查看了您的代码,我注意到您在调用findDuplicate 时没有模拟“buildins.open”。如果您使用上下文管理器进行模拟,则必须在该上下文管理器中调用您的调用。

标签: python unit-testing mocking pytest


【解决方案1】:

我在 os.walk 和我的函数上使用了 mock 来创建我自己的哈希码。

还使用 tempdir 创建了一个包含文件的临时目录。

from unittest import mock
from unittest.mock import patch, mock_open
import pytest
from main import *
import os



@mock.patch("main.iterOverDirectory", side_effect=[{"1": {"test/v", "test/vv"},
                                                    "2": {"test/x", "test/xx"}},
                                                   {"1": {"test/v", "test/vy"},
                                                    "3": {"test/q"}}
                                                   ])
def test_findDuplicate_mock(mock_iteroverdirectory):
    path_arr = ["test", "test2"]
    ans = findDuplicate(path_arr)
    mock_iteroverdirectory.assert_called()
    assert mock_iteroverdirectory.call_count == len(path_arr)
    expected = {"1": {"test/v", "test/vv", "test/vy"}, "2": {"test/x", "test/xx"}}
    for a in ans.keys():
        assert a in expected
        for p in ans[a]:
            assert p in expected[a]


def mock_hash(path):
    a = {
        "/test\\mock_file": "MH-1",
        "/test\\mock_file_copy": "MH-1",
        "/test\\mock_file2_copy": "MH-2",
        "/test/subtest1\\mock_file2": "MH-2",
        "/test/subtest2\\mock_file_copy(1)": "MH-1",
        "/test/subtest/subsubtest\\mock_file2_copy(1)": "MH-2"
    }
    if path in a:
        return a[path]
    return path


@mock.patch('os.walk')
def test_iterOverDirectory(mockwalk):
    with patch('main.hashFile') as mockhash:
        mockwalk.return_value = [
            ('/test', ('subtest1', 'subtest2'), ("mock_file", "mock_file_copy", "mock_file2_copy")),
            ('/test/subtest1', ('subsubtest'), ("spam", "mock_file2", "noise")),
            ('/test/subtest2', (), ("mock_file_copy(1)", "not_relevant")),
            ('/test/subtest2/subsubtest', (), ("mock_file2_copy(1)", "dirt"))
        ]
        mockhash.side_effect = mock_hash
        ans = findDuplicate("/test")
        expected = {
            "MH-1": {"/test\\mock_file", "/test\\mock_file_copy", "/test/subtest2\\mock_file_copy(1)"},
            "MH-2": {"/test\\mock_file2_copy", "/test/subtest1\\mock_file2",
                     "/test/subtest/subsubtest\\mock_file2_copy(1)"}
        }
        for a in ans.keys():
            assert a in expected
            for p in ans[a]:
                assert p in expected[a]

def create_file_system_and_exep_output(tmpdir):

    sub_dir_1 = tmpdir.mkdir("sub_dir_1")
    p1 = sub_dir_1.join("hello.txt")
    p1.write("content")
    p2 = sub_dir_1.join("bye.txt")
    p2.write("contentt")
    p3 = sub_dir_1.join("info.txt")
    p3.write("content's")

    sub_dir_2 = tmpdir.mkdir("sub_dir_2")
    p4 = sub_dir_2.join("test.txt")
    p4.write("content")
    p5 = sub_dir_1.join("fake.txt")
    p5.write("contents")

    sub_sub_dir_1 = sub_dir_1.mkdir("sub_sub_dir_1")
    p6 = sub_sub_dir_1.join("panic.txt")
    p6.write("content")
    p7 = sub_dir_1.join("fake.txt")
    p7.write("contentss")
    p8 = sub_dir_1.join("tut.txt")
    p8.write("conten")
    exe = {hashFile(p1): {p1.strpath, p4.strpath, p6.strpath}}
    return exe,[sub_dir_1, sub_dir_2, sub_sub_dir_1]


def test_create_file(tmpdir):
    expected, ffs_list = create_file_system_and_exep_output(tmpdir)
    ans = findDuplicate(ffs_list)
    for a in ans:
        assert a in expected
        for p in ans[a]:
            b = expected[a]
            c = p
            assert p in expected[a]

【讨论】:

    猜你喜欢
    • 2014-08-23
    • 1970-01-01
    • 1970-01-01
    • 2011-10-21
    • 2012-11-08
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多