【问题标题】:OpenCL memory leaks and errors even after releasing everything with clRelease*()即使在使用 clRelease*() 释放所有内容后,OpenCL 内存泄漏和错误
【发布时间】:2020-08-25 19:40:54
【问题描述】:

我编写了一个非常简单的 OpenCL 程序,它只是将两个数组相加。它可以正常工作,但是如果我使用 Valgrind 运行它,即使在我使用 clRelease* 释放所有内容之后,我也会遇到很多内存泄漏。当我的源代码中有clReleaseProgram(program) 时,我也会收到Mismatched free() / delete / delete[] Valgrind 错误。需要明确的是,无论我是否有该行,我都会遇到内存泄漏,但该行也会给我错误。

我在 main.c 中包含了 main.ckernel.cl 源代码文件以及通过 clReleaseProgram(program) 行获得的 Valgrind 输出

我不知道这是否相关,但我正在使用 AMD 的 ROCM OpenCL 实施并且正在使用 1.2 版,并且它在 Radeon RX 5700XT 显卡上运行。操作系统是 Ubuntu 20.04。

非常感谢任何建议!

// Simple test OpenCL program that adds two arrays

// Define OpenCL version
#define CL_TARGET_OPENCL_VERSION 120

#include <CL/cl.h>
#include <stdlib.h>
#include <stdio.h>


// Main program
int main(int argc, char **argv) {
    // Create all three arrays needed
    float arr1[] = {1.0, 2.0, 3.0, 4.0};
    float arr2[] = {10.0, 11.0, 12.0, 13.0};
    float arr3[4]; 
    
    // Initialize platform id structure
    cl_int err;
    cl_platform_id platform;
    err = clGetPlatformIDs(1, &platform, NULL);

    // Initialize device id structure
    cl_device_id device;
    err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);

    // Initialize context
    cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);

    // Determine file size of kernel source file
    FILE *fp;
    fp = fopen("kernel.cl", "r");
    fseek(fp, 0, SEEK_END);
    long int src_size = ftell(fp);
    rewind(fp);

    // Read kernel source file into buffer
    char *buf = malloc(sizeof(char) * (src_size + 1));
    fread(buf, sizeof(char), src_size, fp);
    buf[src_size] = '\0';

    // Create the program
    cl_program program = clCreateProgramWithSource(context, 1, (const char **) &buf, NULL, &err);
    free(buf);

    // Build the program
    const char options[] = "-cl-std=CL1.2 -D MULT=2";
    err = clBuildProgram(program, 1, &device, options, NULL, NULL);

    // Create command queue
    cl_command_queue queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);

    // Create kernel
    const char kernel_name[] = "test_kernel";
    cl_kernel kernel = clCreateKernel(program, kernel_name, &err);

    // Create the memory objects
    cl_mem arr1_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, 4 * sizeof(float), NULL, &err);
    cl_mem arr2_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, 4 * sizeof(float), NULL, &err);
    cl_mem arr3_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 4 * sizeof(float), NULL, &err);

    // Write the input arrays into the input mem objects
    err = clEnqueueWriteBuffer(queue, arr1_mem, CL_TRUE, 0, 4 * sizeof(float), arr1, 0, NULL, NULL);
    err = clEnqueueWriteBuffer(queue, arr2_mem, CL_TRUE, 0, 4 * sizeof(float), arr2, 0, NULL, NULL);

    // Set the kernel arguments
    err = clSetKernelArg(kernel, 0, sizeof(arr1_mem), &arr1_mem);
    err = clSetKernelArg(kernel, 1, sizeof(arr2_mem), &arr2_mem);
    err = clSetKernelArg(kernel, 2, sizeof(arr3_mem), &arr3_mem);

    // Execute kernel
    size_t gws[] = {4};
    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, gws, NULL, 0, NULL, NULL);

    // Read the output array into the output mem object
    err = clEnqueueReadBuffer(queue, arr3_mem, CL_TRUE, 0, 4 * sizeof(float), arr3, 0, NULL, NULL);

    // Print the output
    for (int i = 0; i < 4; ++i) {
        printf("%f\n", arr3[i]);
    }

    // Release all OpenCL objects
    clReleaseMemObject(arr1_mem);
    clReleaseMemObject(arr2_mem);
    clReleaseMemObject(arr3_mem);
    clReleaseKernel(kernel);
    clReleaseCommandQueue(queue);
    clReleaseProgram(program); // This line causes Mismatched free / delete[] / delete valgrind error
    clReleaseContext(context);

    return 0;
}
// Test OpenCL kernel
__kernel void test_kernel(__constant float *arr1, __constant float *arr2, __global float *arr3) {
    // Get the index into the arrays
    int index = get_global_id(0);

    // Sum arr1 + arr2 and store in arr3
    arr3[index] = arr1[index] + arr2[index];
}

ivy@ubuntu-main:~/cl_test$ valgrind ./a.out
==10340== Memcheck, a memory error detector
==10340== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==10340== Using Valgrind-3.15.0 and LibVEX; rerun with -h for copyright info
==10340== Command: ./a.out
==10340== 
==10340== Warning: set address range perms: large range [0x59c9d000, 0x159e9e000) (noaccess)
11.000000
13.000000
15.000000
17.000000
==10340== Mismatched free() / delete / delete []
==10340==    at 0x483CFBF: operator delete(void*) (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==10340==    by 0x542B8C0: rocr::amd::hsa::loader::AmdHsaCodeLoader::DestroyExecutable(rocr::amd::hsa::loader::Executable*) (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==10340==    by 0x5407ECF: rocr::HSA::hsa_executable_destroy(hsa_executable_s) (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==10340==    by 0x5159C13: roc::Program::~Program() (in /opt/rocm-3.7.0/opencl/lib/libamdocl64.so)
==10340==    by 0x515A606: roc::LightningProgram::~LightningProgram() (in /opt/rocm-3.7.0/opencl/lib/libamdocl64.so)
==10340==    by 0x5106B22: amd::Program::~Program() (in /opt/rocm-3.7.0/opencl/lib/libamdocl64.so)
==10340==    by 0x5106D28: amd::Program::~Program() (in /opt/rocm-3.7.0/opencl/lib/libamdocl64.so)
==10340==    by 0x51006C5: amd::ReferenceCountedObject::release() (in /opt/rocm-3.7.0/opencl/lib/libamdocl64.so)
==10340==    by 0x50DACB3: clReleaseProgram (in /opt/rocm-3.7.0/opencl/lib/libamdocl64.so)
==10340==    by 0x1099A3: main (main.c:92)
==10340==  Address 0xe3634c0 is 0 bytes inside a block of size 42 alloc'd
==10340==    at 0x483B7F3: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==10340==    by 0x4B0750E: strdup (strdup.c:42)
==10340==    by 0x54348A3: rocr::amd::hsa::loader::ExecutableImpl::LoadCodeObject(hsa_agent_s, hsa_code_object_s, unsigned long, char const*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, hsa_loaded_code_object_s*) (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==10340==    by 0x5434F52: rocr::amd::hsa::loader::ExecutableImpl::LoadCodeObject(hsa_agent_s, hsa_code_object_s, char const*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, hsa_loaded_code_object_s*) (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==10340==    by 0x5408460: rocr::HSA::hsa_executable_load_agent_code_object(hsa_executable_s, hsa_agent_s, hsa_code_object_reader_s, char const*, hsa_loaded_code_object_s*) (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==10340==    by 0x5159FBF: roc::LightningProgram::setKernels(amd::option::Options*, void*, unsigned long) (in /opt/rocm-3.7.0/opencl/lib/libamdocl64.so)
==10340==    by 0x5153358: device::Program::linkImplLC(amd::option::Options*) (in /opt/rocm-3.7.0/opencl/lib/libamdocl64.so)
==10340==    by 0x5153ABC: device::Program::build(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, char const*, amd::option::Options*) (in /opt/rocm-3.7.0/opencl/lib/libamdocl64.so)
==10340==    by 0x5108F31: amd::Program::build(std::vector<amd::Device*, std::allocator<amd::Device*> > const&, char const*, void (*)(_cl_program*, void*), void*, bool, bool) (in /opt/rocm-3.7.0/opencl/lib/libamdocl64.so)
==10340==    by 0x50DAF42: clBuildProgram (in /opt/rocm-3.7.0/opencl/lib/libamdocl64.so)
==10340==    by 0x1096B5: main (main.c:51)
==10340== 
==10340== 
==10340== HEAP SUMMARY:
==10340==     in use at exit: 991,949 bytes in 4,653 blocks
==10340==   total heap usage: 576,804 allocs, 572,151 frees, 246,308,729 bytes allocated
==10340== 
==10340== LEAK SUMMARY:
==10340==    definitely lost: 67,584 bytes in 1 blocks
==10340==    indirectly lost: 0 bytes in 0 blocks
==10340==      possibly lost: 7,692 bytes in 66 blocks
==10340==    still reachable: 916,673 bytes in 4,586 blocks
==10340==         suppressed: 0 bytes in 0 blocks
==10340== Rerun with --leak-check=full to see details of leaked memory
==10340== 
==10340== For lists of detected and suppressed errors, rerun with: -s
==10340== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)

编辑:运行时输出:Valgrind --leak-check=full ./a.out 我省略了Valgrind 输出的开头,因为它与上面完全相同。我还省略了大部分中间输出,因为所有错误看起来都基本相同,而且它们太多了,不适合这篇文章。

... Same output here as above ...


==4996== HEAP SUMMARY:
==4996==     in use at exit: 991,947 bytes in 4,653 blocks
==4996==   total heap usage: 575,705 allocs, 571,052 frees, 246,015,102 bytes allocated
==4996== 
==4996== 4 bytes in 1 blocks are possibly lost in loss record 7 of 2,212
==4996==    at 0x483B723: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==4996==    by 0x483E017: realloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==4996==    by 0x5B27783: _fmm_map_to_gpu.isra.8 (in /opt/rocm-3.7.0/lib/libhsakmt.so.1.0.30700)
==4996==    by 0x5B28B57: fmm_map_to_gpu (in /opt/rocm-3.7.0/lib/libhsakmt.so.1.0.30700)
==4996==    by 0x5B2950F: fmm_init_process_apertures (in /opt/rocm-3.7.0/lib/libhsakmt.so.1.0.30700)
==4996==    by 0x5B2CD2B: hsaKmtOpenKFD (in /opt/rocm-3.7.0/lib/libhsakmt.so.1.0.30700)
==4996==    by 0x5401368: rocr::AMD::Load() (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==4996==    by 0x542440D: rocr::core::Runtime::Acquire() (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==4996==    by 0x54027C9: rocr::HSA::hsa_init() (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==4996==    by 0x5135C84: roc::Device::init() (in /opt/rocm-3.7.0/opencl/lib/libamdocl64.so)
==4996==    by 0x50F832E: amd::Device::init() (in /opt/rocm-3.7.0/opencl/lib/libamdocl64.so)
==4996==    by 0x5102525: amd::Runtime::init() (in /opt/rocm-3.7.0/opencl/lib/libamdocl64.so)
==4996== 
==4996== 4 bytes in 1 blocks are possibly lost in loss record 8 of 2,212
==4996==    at 0x483B723: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==4996==    by 0x483E017: realloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==4996==    by 0x5B27783: _fmm_map_to_gpu.isra.8 (in /opt/rocm-3.7.0/lib/libhsakmt.so.1.0.30700)
==4996==    by 0x5B28B57: fmm_map_to_gpu (in /opt/rocm-3.7.0/lib/libhsakmt.so.1.0.30700)
==4996==    by 0x5B2C520: hsaKmtMapMemoryToGPU (in /opt/rocm-3.7.0/lib/libhsakmt.so.1.0.30700)
==4996==    by 0x5B2F29B: allocate_exec_aligned_memory_gpu (in /opt/rocm-3.7.0/lib/libhsakmt.so.1.0.30700)
==4996==    by 0x5B25679: hsaKmtCreateEvent (in /opt/rocm-3.7.0/lib/libhsakmt.so.1.0.30700)
==4996==    by 0x5414E62: rocr::core::InterruptSignal::CreateEvent(_HSA_EVENTTYPE, bool) (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==4996==    by 0x541FD00: rocr::core::Runtime::BindVmFaultHandler() (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==4996==    by 0x5424427: rocr::core::Runtime::Acquire() (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==4996==    by 0x54027C9: rocr::HSA::hsa_init() (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==4996==    by 0x5135C84: roc::Device::init() (in /opt/rocm-3.7.0/opencl/lib/libamdocl64.so)


... Lots of similar repeating errors here ...


==4996== 720 bytes in 3 blocks are possibly lost in loss record 2,165 of 2,212
==4996==    at 0x483B7F3: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==4996==    by 0x5B262DF: vm_create_and_init_object (in /opt/rocm-3.7.0/lib/libhsakmt.so.1.0.30700)
==4996==    by 0x5B265EC: fmm_allocate_memory_object (in /opt/rocm-3.7.0/lib/libhsakmt.so.1.0.30700)
==4996==    by 0x5B28219: fmm_allocate_host (in /opt/rocm-3.7.0/lib/libhsakmt.so.1.0.30700)
==4996==    by 0x5B2B9FA: hsaKmtAllocMemory (in /opt/rocm-3.7.0/lib/libhsakmt.so.1.0.30700)
==4996==    by 0x53F0E31: rocr::AMD::MemoryRegion::Allocate(unsigned long&, unsigned int, void**) const (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==4996==    by 0x541C4C2: std::_Function_handler<void* (unsigned long, unsigned long, unsigned int), rocr::core::Runtime::RegisterAgent(rocr::core::Agent*)::{lambda(unsigned long, unsigned long, unsigned int)#1}>::_M_invoke(std::_Any_data const&, unsigned long&&, std::_Any_data const&, unsigned int&&) (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==4996==    by 0x53DF22E: rocr::AMD::GpuAgent::AssembleShader(char const*, rocr::AMD::GpuAgent::AssembleTarget, void*&, unsigned long&) const (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==4996==    by 0x53D39B2: rocr::AMD::BlitKernel::Initialize(rocr::core::Agent const&) (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==4996==    by 0x53DD58A: std::_Function_handler<rocr::core::Blit* (), rocr::AMD::GpuAgent::InitDma()::{lambda()#3}>::_M_invoke(std::_Any_data const&) (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==4996==    by 0x53DD2F9: rocr::AMD::GpuAgent::DmaCopy(void*, void const*, unsigned long) (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==4996==    by 0x53ED80A: rocr::(anonymous namespace)::RegionMemory::Freeze() (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==4996== 
==4996== 1,200 bytes in 5 blocks are possibly lost in loss record 2,173 of 2,212
==4996==    at 0x483B7F3: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==4996==    by 0x5B262DF: vm_create_and_init_object (in /opt/rocm-3.7.0/lib/libhsakmt.so.1.0.30700)
==4996==    by 0x5B265EC: fmm_allocate_memory_object (in /opt/rocm-3.7.0/lib/libhsakmt.so.1.0.30700)
==4996==    by 0x5B28219: fmm_allocate_host (in /opt/rocm-3.7.0/lib/libhsakmt.so.1.0.30700)
==4996==    by 0x5B2B9FA: hsaKmtAllocMemory (in /opt/rocm-3.7.0/lib/libhsakmt.so.1.0.30700)
==4996==    by 0x53F0E31: rocr::AMD::MemoryRegion::Allocate(unsigned long&, unsigned int, void**) const (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==4996==    by 0x541C4C2: std::_Function_handler<void* (unsigned long, unsigned long, unsigned int), rocr::core::Runtime::RegisterAgent(rocr::core::Agent*)::{lambda(unsigned long, unsigned long, unsigned int)#1}>::_M_invoke(std::_Any_data const&, unsigned long&&, std::_Any_data const&, unsigned int&&) (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==4996==    by 0x542B8D5: rocr::core::Shared<rocr::core::SharedSignal, rocr::core::SharedSignalPool_t>::Shared(rocr::core::SharedSignalPool_t*, int) (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==4996==    by 0x542921E: rocr::core::LocalSignal::LocalSignal(long, bool) (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==4996==    by 0x5414EE8: rocr::core::InterruptSignal::InterruptSignal(long, _HsaEvent*) (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==4996==    by 0x5410E85: rocr::AMD::hsa_amd_signal_create(long, unsigned int, hsa_agent_s const*, unsigned long, hsa_signal_s*) (in /opt/rocm-3.7.0/lib/libhsa-runtime64.so.1.2.30700)
==4996==    by 0x513B727: roc::VirtualGPU::initPool(unsigned long, unsigned int) (in /opt/rocm-3.7.0/opencl/lib/libamdocl64.so)
==4996== 
==4996== 67,584 bytes in 1 blocks are definitely lost in loss record 2,209 of 2,212
==4996==    at 0x483B7F3: malloc (in /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
==4996==    by 0xA977E83: RegisterHandlers() (in /opt/rocm-3.7.0/lib/libamd_comgr.so.1.6.30700)
==4996==    by 0xA9787EB: llvm::sys::RemoveFileOnSignal(llvm::StringRef, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) (in /opt/rocm-3.7.0/lib/libamd_comgr.so.1.6.30700)
==4996==    by 0x8F2BC0E: clang::CompilerInstance::createOutputFile(llvm::StringRef, std::error_code&, bool, bool, llvm::StringRef, llvm::StringRef, bool, bool, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) (in /opt/rocm-3.7.0/lib/libamd_comgr.so.1.6.30700)
==4996==    by 0x8F2BFD2: clang::CompilerInstance::createOutputFile(llvm::StringRef, bool, bool, llvm::StringRef, llvm::StringRef, bool, bool) (in /opt/rocm-3.7.0/lib/libamd_comgr.so.1.6.30700)
==4996==    by 0x8F2C39E: clang::CompilerInstance::createDefaultOutputFile(bool, llvm::StringRef, llvm::StringRef) (in /opt/rocm-3.7.0/lib/libamd_comgr.so.1.6.30700)
==4996==    by 0x77F3913: GetOutputStream(clang::CompilerInstance&, llvm::StringRef, clang::BackendAction) (in /opt/rocm-3.7.0/lib/libamd_comgr.so.1.6.30700)
==4996==    by 0x77F959A: clang::CodeGenAction::CreateASTConsumer(clang::CompilerInstance&, llvm::StringRef) (in /opt/rocm-3.7.0/lib/libamd_comgr.so.1.6.30700)
==4996==    by 0x8F6D175: clang::FrontendAction::CreateWrappedASTConsumer(clang::CompilerInstance&, llvm::StringRef) (in /opt/rocm-3.7.0/lib/libamd_comgr.so.1.6.30700)
==4996==    by 0x8F70865: clang::FrontendAction::BeginSourceFile(clang::CompilerInstance&, clang::FrontendInputFile const&) (in /opt/rocm-3.7.0/lib/libamd_comgr.so.1.6.30700)
==4996==    by 0x8F2F079: clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) (in /opt/rocm-3.7.0/lib/libamd_comgr.so.1.6.30700)
==4996==    by 0x743641A: clang::ExecuteCompilerInvocation(clang::CompilerInstance*) (in /opt/rocm-3.7.0/lib/libamd_comgr.so.1.6.30700)
==4996== 
==4996== LEAK SUMMARY:
==4996==    definitely lost: 67,584 bytes in 1 blocks
==4996==    indirectly lost: 0 bytes in 0 blocks
==4996==      possibly lost: 7,692 bytes in 66 blocks
==4996==    still reachable: 916,671 bytes in 4,586 blocks
==4996==         suppressed: 0 bytes in 0 blocks
==4996== Reachable blocks (those to which a pointer was found) are not shown.
==4996== To see them, rerun with: --leak-check=full --show-leak-kinds=all
==4996== 
==4996== For lists of detected and suppressed errors, rerun with: -s
==4996== ERROR SUMMARY: 56 errors from 56 contexts (suppressed: 0 from 0)

【问题讨论】:

  • 听起来像是你应该针对 rocm 项目报告的错误。
  • @pmdj 正确的做法是here 吗?我可以链接这个stackoverflow问题还是应该从头开始再问一次?我不想惹恼开发者哈哈。
  • 看起来这是正确的地方,注意还有其他一些关于 valgrind 错误的报告。确保您检查是否有人报告了您遇到的特定错误,并查看您是否可以进一步减少代码示例,并且在发布之前仍然会收到错误。我会在那里发布重现代码,我认为链接到 Stack Overflow 可能对任何人都没有用。
  • @pmdj 好的,我会这样做的。这段代码实际上已经是我能想出的最简单的例子来重现我的主要高斯模糊程序中的错误。感谢您的所有帮助!

标签: c memory-leaks opencl valgrind


【解决方案1】:

如果您使用的是“标准”libstdc++ 和 libc,那么在您的情况下,不匹配的 strdup / delete(而不是 strdup / free)应该不会造成问题。这是因为 libstdc++ new 只是调用了malloc,而delete 调用了free。另一方面,如果您使用了一个替换的delete,它不是基于free,那么您的应用程序很可能会严重崩溃。

对于内存泄漏,您需要运行 memcheck--leak-check=full,正如它在 Valgrind 输出末尾附近所说的那样。

【讨论】:

  • 非常感谢您的回复。分配和取消分配由 ROCM 代码处理,所以如果我理解正确,只要他们的代码使用 newdelete 调用 mallocfree 应该没有问题?我的程序正在产生正确的输出,所以我认为这意味着它不会“严重崩溃”?我还添加了Valgrind --leak-check=full 输出,但您可以看到所有泄漏的所有起源都始于 ROCM 代码,所以我不确定在代码结束时我应该做什么。有什么建议吗?
猜你喜欢
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 2012-06-08
  • 1970-01-01
  • 1970-01-01
  • 2011-12-17
  • 1970-01-01
相关资源
最近更新 更多