为 boost::deque 保留块分配器？答案

【问题标题】：Preserving block-allocator for boost::deque?为 boost::deque 保留块分配器？
【发布时间】：2020-08-09 11:30:59
【问题描述】：

我想让boost::container::deque 重用已释放的块，而不是释放它们然后分配新的块。

boost::container::deque 允许的唯一配置选项是compile-time specification of the block size（就项目或字节数而言）。

确实，指定块大小是我想使用的东西，但我也想指定在空闲后将保留的块数，并在需要新块时重用。然而，如图here 所示，对于boost::container::deque，这个数字是0，所以它会在块空闲时立即释放！我想创建一个这个数字等于1的双端队列。

我看到了通过指定自定义分配器来实现这一目标的机会。想想这个丑陋的：

template < typename Block >
struct PreservingAllocator : std::allocator<Block>
{
  using std::allocator<Block>::allocator;

  Block* allocate(size_t nn)
  {
    if (nn == 1) if (auto oldBlock = m_reserve->exchange(nullptr); !!oldBlock) return oldBlock;
    return std::allocator<Block>::allocate(nn);
  }

  void deallocate(Block* block, size_t nn)
  {
    if (nn == 1) block = m_reserve->exchange(block);
    if (!!block) std::allocator<Block>::deallocate(block, nn);
  }

private:
  static constexpr auto Deleter = [](std::atomic<Block*>* pointer)
  {
    if (!!pointer) if (auto block = pointer->exchange(nullptr); !!block)
      std::allocator<Block>{}.deallocate(block,1);
   delete pointer;
  };

  std::shared_ptr<std::atomic<Block*>> m_reserve = {new std::atomic<Block*>{nullptr},Deleter};
};

所以问题是。

如何为boost::container::deque 指定块分配器（块，而不是items！）？
如果有办法，那么这样的规范会支持带状态的分配器吗？
如果是，那么上面提到的分配器会去吗？
毕竟，如果不是这样，我怎么能创建一个不会释放至少一个已释放块并在以后需要新块时重用它的双端队列？

【问题讨论】：

如果您使用池分配器，您将完全有这种行为。这就是为什么分配器首先是标准容器的“扩展”点。 dequq 无论如何都不会分配单个项目（这是在数据结构的规范中）。

标签： c++ boost memory-management deque allocator

【解决方案1】：

这给了我一个更多地使用分配器的借口。我选择了多态分配器——尽管这只是切线相关¹。

旁白：这种关系是，使用自定义分配器时，您经常希望将分配器传播到可识别分配器的嵌套类型。请参阅下面的“高级”

示例元素类型

struct X {
    std::string key, value;
};

它并没有变得更简单，尽管它允许我们尝试分享稍后带有嵌套字符串的分配器。

跟踪内存资源

让我们创建一个跟踪内存资源。这很简单，我们只转发标准的new/delete：

namespace pmr = boost::container::pmr;

struct tracing_resource : pmr::memory_resource {
    uint64_t n = 0, total_bytes = 0;

    virtual void* do_allocate(std::size_t bytes, std::size_t alignment) override {
        n += 1;
        total_bytes += bytes;
        return pmr::new_delete_resource()->allocate(bytes, alignment);
    }

    virtual void do_deallocate(void* p, std::size_t bytes, std::size_t alignment) override {
        if (p) {
            n -= 1;
            total_bytes -= bytes;
        }
        return pmr::new_delete_resource()->deallocate(p, bytes, alignment);
    }

    virtual bool do_is_equal(const memory_resource& other) const noexcept override {
        return pmr::new_delete_resource()->is_equal(other);
    }
};

我们可以检查n（分配数量）以及在整个测试代码中不同点分配的总字节数。

测试程序

让我们把它放在main 中，从我们的跟踪器开始：

tracing_resource tracer;

让我们在上面挂载一个池化资源：

pmr::unsynchronized_pool_resource res(&tracer);

auto allocations = [&] {
    fmt::print("alloc: #{}, {} bytes, cached = {}\n", tracer.n, tracer.total_bytes, cache_buckets(res));
};
allocations();

这将打印出来

alloc: #0, 0 bytes, cached = {0, 0, 0, 0, 0, 0, 0, 0, 0}

就在门口。

现在，让我们开始（重新）以各种模式分配一些双端队列：

pmr::deque<X> collection(&res);
auto report = [&] {
    fmt::print("collection = {}\nalloc: #{}, {} bytes, cached = {}\n", collection, tracer.n, tracer.total_bytes, cache_buckets(res));
};

std::vector data1 { X{"1", "eins"}, {"2", "zwei"},  {"3", "drei"}, };
std::vector data2 { X{"4", "vier"},   {"5", "fuenf"}, {"6", "sechs"}, };
std::vector data3 { X{"7", "sieben"}, {"8", "acht"},  {"9", "neun"}, };

auto i = 0;
for (auto const& data : {data1, data2, data3}) {
    for (auto el : data) {
        (i%2)
            ? collection.push_back(el)
            : collection.push_front(el);
    }

    report();

    collection.clear();
    report();
}

这将在容器的不同末端附加不同的序列。我们不会做很多突变，因为当我们制作字符串时这会变得有趣使用池化资源）。

现场演示

Live On Compiler Explorer

#include <boost/container/pmr/deque.hpp>
#include <boost/container/pmr/unsynchronized_pool_resource.hpp>

// debug output
#include <range/v3/all.hpp>
#include <fmt/ranges.h>
#include <fmt/ostream.h>
#include <iomanip>

namespace pmr = boost::container::pmr;

struct tracing_resource : pmr::memory_resource {
    uint64_t n = 0, total_bytes = 0;

    virtual void* do_allocate(std::size_t bytes, std::size_t alignment) override {
        n += 1;
        total_bytes += bytes;
        return pmr::new_delete_resource()->allocate(bytes, alignment);
    }

    virtual void do_deallocate(void* p, std::size_t bytes, std::size_t alignment) override {
        if (p) {
            n -= 1;
            total_bytes -= bytes;
        }
        return pmr::new_delete_resource()->deallocate(p, bytes, alignment);
    }

    virtual bool do_is_equal(const memory_resource& other) const noexcept override {
        return pmr::new_delete_resource()->is_equal(other);
    }
};

struct X {
    std::string key, value;

    friend std::ostream& operator<<(std::ostream& os, X const& x) {
        return os << "(" << std::quoted(x.key) << ", " << std::quoted(x.value) << ")";
    }
};

auto cache_buckets(pmr::unsynchronized_pool_resource& res) {
    using namespace ::ranges;
    return views::iota(0ull)
        | views::take_exactly(res.pool_count())
        | views::transform([&](auto idx) {
            return res.pool_cached_blocks(idx);
        });
}

int main() {
    tracing_resource tracer;
    {
        pmr::unsynchronized_pool_resource res(&tracer);

        auto allocations = [&] {
            fmt::print("alloc: #{}, {} bytes, cached = {}\n", tracer.n, tracer.total_bytes, cache_buckets(res));
        };
        allocations();

        {
            pmr::deque<X> collection(&res);
            auto report = [&] {
                fmt::print("collection = {}\nalloc: #{}, {} bytes, cached = {}\n", collection, tracer.n, tracer.total_bytes, cache_buckets(res));
            };

            std::vector data1 { X{"1", "eins"}, {"2", "zwei"},  {"3", "drei"}, };
            std::vector data2 { X{"4", "vier"},   {"5", "fuenf"}, {"6", "sechs"}, };
            std::vector data3 { X{"7", "sieben"}, {"8", "acht"},  {"9", "neun"}, };
                            
            auto i = 0;
            for (auto const& data : {data1, data2, data3}) {
                for (auto el : data) {
                    (i%2)
                        ? collection.push_back(el)
                        : collection.push_front(el);
                }

                report();

                collection.clear();
                report();
            }
        }

        allocations();
    }

    fmt::print("alloc: #{}, {} bytes\n", tracer.n, tracer.total_bytes);
}

打印

alloc: #0, 0 bytes, cached = {0, 0, 0, 0, 0, 0, 0, 0, 0}
collection = {("3", "drei"), ("2", "zwei"), ("1", "eins")}
alloc: #4, 1864 bytes, cached = {0, 0, 0, 0, 0, 1, 0, 0, 0}
collection = {}
alloc: #4, 1864 bytes, cached = {0, 0, 0, 0, 0, 2, 0, 0, 0}
collection = {("6", "sechs"), ("5", "fuenf"), ("4", "vier")}
alloc: #4, 1864 bytes, cached = {0, 0, 0, 0, 0, 2, 0, 0, 0}
collection = {}
alloc: #4, 1864 bytes, cached = {0, 0, 0, 0, 0, 2, 0, 0, 0}
collection = {("9", "neun"), ("8", "acht"), ("7", "sieben")}
alloc: #4, 1864 bytes, cached = {0, 0, 0, 0, 0, 2, 0, 0, 0}
collection = {}
alloc: #4, 1864 bytes, cached = {0, 0, 0, 0, 0, 2, 0, 0, 0}
alloc: #4, 1864 bytes, cached = {0, 0, 1, 0, 0, 3, 0, 0, 0}
alloc: #0, 0 bytes

高级

正如所承诺的，我们可以让元素类型分配器感知并显示传播：

 #include <boost/container/pmr/string.hpp>
 // ...

 struct X {
     using allocator_type = pmr::polymorphic_allocator<X>;
 
     template<typename K, typename V>
     explicit X(K&& key, V&& value, allocator_type a = {})
         : key(std::forward<K>(key), a), value(std::forward<V>(value), a) {}
 
     pmr::string key, value;
 };

让我们修改测试驱动程序以 emplace 字符串文字中的元素：

std::vector data1 { std::pair{"1", "eins"}, {"2", "zwei"},  {"3", "drei"}, };
std::vector data2 { std::pair{"4", "vier"},   {"5", "fuenf"}, {"6", "sechs"}, };
std::vector data3 { std::pair{"7", "sieben"}, {"8", "acht"},  {"9", "neun"}, };
                
auto i = 0;
for (auto const& data : {data1, data2, data3}) {
    for (auto [k,v] : data) {
        (i%2)
            ? collection.emplace_back(k, v)
            : collection.emplace_front(k, v);
    }

为了更好的衡量，让我们也改变嵌套字符串值之一：

    collection.at(1).value.append(50, '*'); // thwart SSO
    report();

    collection.at(1).value = "sept";
    report();

再次演示 Live On Compiler Explorer

#include <boost/container/pmr/deque.hpp>
#include <boost/container/pmr/string.hpp>
#include <boost/container/pmr/unsynchronized_pool_resource.hpp>

// debug output
#include <range/v3/all.hpp>
#include <fmt/ranges.h>
#include <fmt/ostream.h>
#include <iomanip>

namespace pmr = boost::container::pmr;

struct tracing_resource : pmr::memory_resource {
    uint64_t n = 0, total_bytes = 0;

    virtual void* do_allocate(std::size_t bytes, std::size_t alignment) override {
        n += 1;
        total_bytes += bytes;
        return pmr::new_delete_resource()->allocate(bytes, alignment);
    }

    virtual void do_deallocate(void* p, std::size_t bytes, std::size_t alignment) override {
        if (p) {
            n -= 1;
            total_bytes -= bytes;
        }
        return pmr::new_delete_resource()->deallocate(p, bytes, alignment);
    }

    virtual bool do_is_equal(const memory_resource& other) const noexcept override {
        return pmr::new_delete_resource()->is_equal(other);
    }
};

struct X {
    using allocator_type = pmr::polymorphic_allocator<X>;

    template<typename K, typename V>
    explicit X(K&& key, V&& value, allocator_type a = {})
        : key(std::forward<K>(key), a), value(std::forward<V>(value), a) {}

    pmr::string key, value;

    friend std::ostream& operator<<(std::ostream& os, X const& x) {
        return os << "(" << std::quoted(x.key.c_str()) << ", " << std::quoted(x.value.c_str()) << ")";
    }
};

auto cache_buckets(pmr::unsynchronized_pool_resource& res) {
    using namespace ::ranges;
    return views::iota(0ull)
        | views::take_exactly(res.pool_count())
        | views::transform([&](auto idx) {
            return res.pool_cached_blocks(idx);
        });
}

int main() {
    tracing_resource tracer;
    {
        pmr::unsynchronized_pool_resource res(&tracer);

        auto allocations = [&] {
            fmt::print("alloc: #{}, {} bytes, cached = {}\n", tracer.n, tracer.total_bytes, cache_buckets(res));
        };
        allocations();

        {
            pmr::deque<X> collection(&res);
            auto report = [&] {
                fmt::print("collection = {}\nalloc: #{}, {} bytes, cached = {}\n", collection, tracer.n, tracer.total_bytes, cache_buckets(res));
            };

            std::vector data1 { std::pair{"1", "eins"}, {"2", "zwei"},  {"3", "drei"}, };
            std::vector data2 { std::pair{"4", "vier"},   {"5", "fuenf"}, {"6", "sechs"}, };
            std::vector data3 { std::pair{"7", "sieben"}, {"8", "acht"},  {"9", "neun"}, };
                            
            auto i = 0;
            for (auto const& data : {data1, data2, data3}) {
                for (auto [k,v] : data) {
                    (i%2)
                        ? collection.emplace_back(k, v)
                        : collection.emplace_front(k, v);
                }

                report();

                collection.at(1).value.append(50, '*'); // thwart SSO
                report();

                collection.at(1).value = "sept";
                report();

                collection.clear();
                report();
            }
        }

        allocations();
    }

    fmt::print("alloc: #{}, {} bytes\n", tracer.n, tracer.total_bytes);
}

打印：

alloc: #0, 0 bytes, cached = {0, 0, 0, 0, 0, 0, 0, 0, 0}
collection = {("3", "drei"), ("2", "zwei"), ("1", "eins")}
alloc: #4, 1864 bytes, cached = {0, 0, 0, 0, 0, 1, 0, 0, 0}
collection = {("3", "drei"), ("2", "zwei**************************************************"), ("1", "eins")}
alloc: #5, 2008 bytes, cached = {0, 0, 0, 0, 0, 1, 0, 0, 0}
collection = {("3", "drei"), ("2", "sept"), ("1", "eins")}
alloc: #5, 2008 bytes, cached = {0, 0, 0, 0, 0, 1, 0, 0, 0}
collection = {}
alloc: #5, 2008 bytes, cached = {0, 0, 0, 1, 0, 2, 0, 0, 0}
collection = {("6", "sechs"), ("5", "fuenf"), ("4", "vier")}
alloc: #5, 2008 bytes, cached = {0, 0, 0, 1, 0, 2, 0, 0, 0}
collection = {("6", "sechs"), ("5", "fuenf**************************************************"), ("4", "vier")}
alloc: #5, 2008 bytes, cached = {0, 0, 0, 0, 0, 2, 0, 0, 0}
collection = {("6", "sechs"), ("5", "sept"), ("4", "vier")}
alloc: #5, 2008 bytes, cached = {0, 0, 0, 0, 0, 2, 0, 0, 0}
collection = {}
alloc: #5, 2008 bytes, cached = {0, 0, 0, 1, 0, 2, 0, 0, 0}
collection = {("9", "neun"), ("8", "acht"), ("7", "sieben")}
alloc: #5, 2008 bytes, cached = {0, 0, 0, 1, 0, 1, 0, 0, 0}
collection = {("9", "neun"), ("8", "acht**************************************************"), ("7", "sieben")}
alloc: #5, 2008 bytes, cached = {0, 0, 0, 0, 0, 1, 0, 0, 0}
collection = {("9", "neun"), ("8", "sept"), ("7", "sieben")}
alloc: #5, 2008 bytes, cached = {0, 0, 0, 0, 0, 1, 0, 0, 0}
collection = {}
alloc: #5, 2008 bytes, cached = {0, 0, 0, 1, 0, 2, 0, 0, 0}
alloc: #5, 2008 bytes, cached = {0, 0, 1, 1, 0, 3, 0, 0, 0}
alloc: #0, 0 bytes

结论

虽然我选择了多态分配器，因为它们默认支持scoped_allocator_adaptor<> 样式的传播，但以上所有内容都可以使用静态类型的分配器创建。

它确实表明，如果您使用池分配器，则双端队列行为将变为池化。

旁白：存在能够放弃清理的池分配器，这在某些情况下可能是有效的，例如无论如何，整个内存池都位于堆栈上。这是一种常见的分配优化技术，允许跳过大量的释放/销毁。

解决您列表中的更多问题：

Q. 如何为 boost::container::deque 指定块分配器（块，而不是物品！）？

A. 我认为分配器本质上总是被调用块，因为双端队列的工作方式。
Q.如果有办法，那么这样的规范会支持分配器吗有状态吗？

A. 标准库和 Boost Container 都应该支持这些天有状态的分配器。如有疑问，Boost Container 有您的返回。
问。如果是，那么上面提到的分配器会去吗？

A.我没仔细看，但你可以把它放在同一个测试台上找出来
Q. 毕竟，如果不是这样，我怎么能做一个不会至少释放一个释放的块，并在稍后重用它需要新的区块吗？

答。见上文。我不确定我是否理解“在至少一个”子句，但我确实注意到 Boost 的双端队列实现确实可以一个“私人地图”分配——大概是为了一些块记账开销 - 一直存在直到 deque 对象被销毁。本次分配不会在（默认）构造时发生，而是在以后发生。

【讨论】：

在 Boost object_pool 而不是 PMR memory_resources 之上添加了一个具有非多态状态分配器的实现。见other answer

【解决方案2】：

不能让well enough 一个人呆着 - 我想证明它可以用有状态的静态类型分配器来完成。

这建立在

Boost Container 的 basic_string 和 deque
Boost Container 的 scoped_allocator_adaptor 以在 uses_allocator-construction 上获取分配器传播
Boost Pool 的 object_pool 用于使用 segregated storage、可选块大小提示和任意 UserAllocator 的有序分配的范围池
最后，我使用自己的（惊喜！我forgot about that earlier）有状态分配器来补充object_pool¹。它是non_boost 命名空间中的分配器。

请注意，如果您不介意单例池，则可以使用 Boist 自己的 pool allocators。这显然是推荐的，因为我的不是由 Boost 维护的。

注意：为了清洁，我牺牲了池化统计信息（将这些添加到分配器比装饰多态 memory_resource 更麻烦），但我猜探查器最终最清楚

Live On Compiler Explorer

#include <boost/container/deque.hpp>
#include <boost/container/string.hpp>
#include <boost/container/scoped_allocator.hpp>
#include <boost/pool/pool_alloc.hpp>

// debug output
#include <range/v3/all.hpp>
#include <fmt/ranges.h>
#include <fmt/ostream.h>
#include <iomanip>

namespace bc = boost::container;

namespace non_boost {
    template <typename T, typename UserAllocator = boost::default_user_allocator_new_delete>
    class fast_pool_allocator
    {
      public:
        typedef T value_type;
        typedef UserAllocator user_allocator;

        typedef value_type * pointer;
        typedef const value_type * const_pointer;
        typedef value_type & reference;
        typedef const value_type & const_reference;
        typedef boost::pool<UserAllocator> pool_type;
        typedef typename pool_type::size_type       size_type;
        typedef typename pool_type::difference_type difference_type;

        template <typename U>
        struct rebind {
            typedef fast_pool_allocator<U, UserAllocator> other;
        };

        pool_type* _ref;
      public:
        fast_pool_allocator(pool_type& ref) : _ref(&ref) { }

        fast_pool_allocator(fast_pool_allocator const&) = default;
        fast_pool_allocator& operator=(fast_pool_allocator const&) = default;

        // Not explicit, mimicking std::allocator [20.4.1]
        template <typename U>
        fast_pool_allocator(const fast_pool_allocator<U, UserAllocator> & other) : _ref(other._ref)
        { }

        // Default destructor used.
        static pointer address(reference r)                     { return &r;                                      } 
        static const_pointer address(const_reference s)         { return &s;                                      } 
        static size_type max_size()                             { return (std::numeric_limits<size_type>::max)(); } 
        void construct(const pointer ptr, const value_type & t) { new (ptr) T(t);                                 } 
        void destroy(const pointer ptr)                         { ptr->~T();                                      } 

        bool operator==(fast_pool_allocator const& rhs) const { return _ref == rhs._ref; }
        bool operator!=(fast_pool_allocator const& rhs) const { return _ref != rhs._ref; }

        pointer allocate(const size_type n)
        {
            const pointer ret = (n == 1) 
                ? static_cast<pointer>( (_ref->malloc)() ) 
                : static_cast<pointer>( _ref->ordered_malloc(n) );
            if (ret == 0)
                boost::throw_exception(std::bad_alloc());
            return ret;
        }

        pointer allocate(const size_type n, const void * const) { return allocate(n); }
        pointer allocate()
        {
            const pointer ret = static_cast<pointer>( (_ref->malloc)() );
            if (ret == 0)
                boost::throw_exception(std::bad_alloc());
            return ret;
        }
        void deallocate(const pointer ptr, const size_type n)
        {
#ifdef BOOST_NO_PROPER_STL_DEALLOCATE
            if (ptr == 0 || n == 0)
                return;
#endif
            if (n == 1)
                (_ref->free)(ptr);
            else
                (_ref->free)(ptr, n);
        }
        void deallocate(const pointer ptr) { (_ref->free)(ptr); }
    };

    //Specialization of fast_pool_allocator<void> required to make the allocator standard-conforming.
    template<typename UserAllocator>
    class fast_pool_allocator<void, UserAllocator> {
    public:
        typedef void*       pointer;
        typedef const void* const_pointer;
        typedef void        value_type;

        template <class U> struct rebind {
            typedef fast_pool_allocator<U, UserAllocator> other;
        };
    };
}

template <typename T> using Alloc
    = bc::scoped_allocator_adaptor< non_boost::fast_pool_allocator<T> >;

struct X {
    using allocator_type = Alloc<X>;

    template<typename K, typename V>
    explicit X(K&& key, V&& value, allocator_type a)
        : key(std::forward<K>(key), a), value(std::forward<V>(value), a) {}

    bc::basic_string<char, std::char_traits<char>, Alloc<char> > key, value;

    friend std::ostream& operator<<(std::ostream& os, X const& x) {
        return os << "(" << std::quoted(x.key.c_str()) << ", " << std::quoted(x.value.c_str()) << ")";
    }
};

int main() {
    boost::pool<boost::default_user_allocator_new_delete> _pool { sizeof(X) };
    Alloc<X> alloc { _pool };

    bc::deque<X, Alloc<X> > collection(alloc);
    auto dump = [&] { fmt::print("collection = {}\n", collection); };

    std::vector data1 { std::pair{"1", "eins"}, {"2", "zwei"},  {"3", "drei"}, };
    std::vector data2 { std::pair{"4", "vier"},   {"5", "fuenf"}, {"6", "sechs"}, };
    std::vector data3 { std::pair{"7", "sieben"}, {"8", "acht"},  {"9", "neun"}, };

    auto i = 0;
    for (auto const& data : {data1, data2, data3}) {
        for (auto [k,v] : data) {
            (i%2)
                ? collection.emplace_back(k, v)
                : collection.emplace_front(k, v);
        }

        dump();

        collection.at(1).value.append(50, '*'); // thwart SSO
        dump();

        collection.at(1).value = "sept";
        dump();

        collection.clear();
        dump();
    }
}

¹（参见Is there some way to use boost::obect_pool with faster free operations 和Is there a BOOST pool fixed-sized allocator?）

【讨论】：

这里证明了您可以使用现代标准库实现（即实现有状态分配器支持）来做到这一点：Live On Compiler Explorer 没有 Boost Container