我的 C++ 代码没有正确检测对象 yolov5答案

【问题标题】：My C++ code is not detecting objects correctly yolov5我的 C++ 代码没有正确检测对象 yolov5
【发布时间】：2022-11-26 13:59:20
【问题描述】：

我有一个 yolov5 onnx 文件，我在其中训练了苹果和香蕉。直到今天我一直在使用 python，但我决定切换到 c++ 以获得一些速度。当我在下面添加的代码中使用 yolov5 自己的 onnx 文件和图像时，我得到了正确的结果。但是当我添加我自己的 onnx 文件和我的测试图像时，它给了我错误的结果。您还可以找到附加的图像。这里有什么问题？


// Include Libraries.
\#include \<opencv2/opencv.hpp\>
\#include \<fstream\>

// Namespaces.
using namespace cv;
using namespace std;
using namespace cv::dnn;

// Constants.
const float INPUT_WIDTH = 640.0;
const float INPUT_HEIGHT = 640.0;
const float SCORE_THRESHOLD = 0.3;
const float NMS_THRESHOLD = 0.4;
const float CONFIDENCE_THRESHOLD = 0.65;

// Text parameters.
const float FONT_SCALE = 0.7;
const int FONT_FACE = FONT_HERSHEY_SIMPLEX;
const int THICKNESS = 1;

// Colors.
Scalar BLACK = Scalar(0,0,0);
Scalar BLUE = Scalar(255, 178, 50);
Scalar YELLOW = Scalar(0, 255, 255);
Scalar RED = Scalar(0,0,255);

// Draw the predicted bounding box.
void draw_label(Mat& input_image, string label, int left, int top)
{
// Display the label at the top of the bounding box.
int baseLine;
Size label_size = getTextSize(label, FONT_FACE, FONT_SCALE, THICKNESS, &baseLine);
top = max(top, label_size.height);
// Top left corner.
Point tlc = Point(left, top);
// Bottom right corner.
Point brc = Point(left + label_size.width, top + label_size.height + baseLine);
// Draw black rectangle.
rectangle(input_image, tlc, brc, BLACK, FILLED);
// Put the label on the black rectangle.
putText(input_image, label, Point(left, top + label_size.height), FONT_FACE, FONT_SCALE, YELLOW, THICKNESS);
}

vector\<Mat\> pre_process(Mat &input_image, Net &net)
{
// Convert to blob.
Mat blob;
blobFromImage(input_image, blob, 1./255., Size(INPUT_WIDTH, INPUT_HEIGHT), Scalar(), true, false);

    net.setInput(blob);
    
    // Forward propagate.
    vector<Mat> outputs;
    net.forward(outputs, net.getUnconnectedOutLayersNames());
    
    return outputs;

}

Mat post_process(Mat &input_image, vector\<Mat\> &outputs, const vector\<string\> &class_name)
{
// Initialize vectors to hold respective outputs while unwrapping detections.
vector\<int\> class_ids;
vector\<float\> confidences;
vector\<Rect\> boxes;

    // Resizing factor.
    float x_factor = input_image.cols / INPUT_WIDTH;
    float y_factor = input_image.rows / INPUT_HEIGHT;
    
    float *data = (float *)outputs[0].data;
    
    const int dimensions = 85;
    const int rows = 25200;
    // Iterate through 25200 detections.
    for (int i = 0; i < rows; ++i) 
    {
        float confidence = data[4];
        // Discard bad detections and continue.
        if (confidence >= CONFIDENCE_THRESHOLD) 
        {
            float * classes_scores = data + 5;
            // Create a 1x85 Mat and store class scores of 80 classes.
            Mat scores(1, class_name.size(), CV_32FC1, classes_scores);
            // Perform minMaxLoc and acquire index of best class score.
            Point class_id;
            double max_class_score;
            minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
            // Continue if the class score is above the threshold.
            if (max_class_score > SCORE_THRESHOLD) 
            {
                // Store class ID and confidence in the pre-defined respective vectors.
    
                confidences.push_back(confidence);
                class_ids.push_back(class_id.x);
    
                // Center.
                float cx = data[0];
                float cy = data[1];
                // Box dimension.
                float w = data[2];
                float h = data[3];
                // Bounding box coordinates.
                int left = int((cx - 0.5 * w) * x_factor);
                int top = int((cy - 0.5 * h) * y_factor);
                int width = int(w * x_factor);
                int height = int(h * y_factor);
                // Store good detections in the boxes vector.
                boxes.push_back(Rect(left, top, width, height));
            }
    
        }
        // Jump to the next column.
        data += 85;
    
        
    }
    
    // Perform Non Maximum Suppression and draw predictions.
    vector<int> indices;
    NMSBoxes(boxes, confidences, SCORE_THRESHOLD, NMS_THRESHOLD, indices);
    for (int i = 0; i < indices.size(); i++) 
    {
        int idx = indices[i];
        Rect box = boxes[idx];
    
        int left = box.x;
        int top = box.y;
        int width = box.width;
        int height = box.height;
        // Draw bounding box.
        rectangle(input_image, Point(left, top), Point(left + width, top + height), BLUE, 3*THICKNESS);
    
        // Get the label for the class name and its confidence.
        string label = format("%.2f", confidences[idx]);
        label = class_name[class_ids[idx]] + ":" + label;
        // Draw class labels.
        draw_label(input_image, label, left, top);
        //cout<<"The Value is "<<label;
        //cout<<endl;
        
    }
    
    return input_image;

}

int main()
{

    vector<string> class_list;
    ifstream ifs("/Users/admin/Documents/C++/First/obj.names");
    string line;
    
    while (getline(ifs, line))
    {
        class_list.push_back(line);
    }
    
    // Load image.
    Mat frame;
    frame = imread("/Users/admin/Documents/C++/First/test.jpg");
    
    // Load model.
    Net net;
    net = readNet("/Users/admin/Documents/C++/First/my.onnx"); 
    
    vector<Mat> detections;
    detections = pre_process(frame, net);
    
    Mat img = post_process(frame, detections, class_list);
    
    //Mat img = post_process(frame.clone(), detections, class_list);
    
    // Put efficiency information.
    // The function getPerfProfile returns the overall time for inference(t) and the timings for each of the layers(in layersTimes)
    
    vector<double> layersTimes;
    double freq = getTickFrequency() / 1000;
    double t = net.getPerfProfile(layersTimes) / freq;
    string label = format("Inference time : %.2f ms", t);
    putText(img, label, Point(20, 40), FONT_FACE, FONT_SCALE, RED);
    
    imshow("Output", img);
    waitKey(0);
    
    return 0;

}

我使用的照片是 640x480。我试了一下照片的大小，认为这可能是相关的，但同样的问题仍然存在。

【问题讨论】：

标签： c++ opencv machine-learning yolov5

【解决方案1】：

Yolov5 输出格式为 xyxy，如下所示：

https://github.com/ultralytics/yolov5/blob/bfa1f23045c7c4136a9b8ced9d6be8249ed72692/detect.py#L161

不是您在代码中假设的 xywh

【讨论】：