'Video Lagging while Object Detection on C++

There is an object detection pre-trained model i.e. Yolov3/v4-tiny, when the algorithm is implemented in python, everything looked good, there is no lag while processing the video when displaying it over "Imshow".

import cv2
import numpy as np
import time
#net = cv2.dnn.readNet("yolov3.weights","yolov3.cfg") # Original yolov3
net = cv2.dnn.readNet("yolov4-tiny.weights","yolov4-tiny.cfg") #Tiny Yolo
classes = []
with open("coco.names","r") as f:
    classes = [line.strip() for line in f.readlines()]

# print(classes)
layer_names = net.getLayerNames()
# print(layer_names)

outputlayers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
print(outputlayers)
colors= np.random.uniform(0,255,size=(len(classes),3))
# print(colors)
cap=cv2.VideoCapture(0) #0 for 1st webcam
cap.set(cv2.CAP_PROP_FPS, 30)
cap.set(cv2.CAP_PROP_BUFFERSIZE, 2)
font = cv2.FONT_HERSHEY_PLAIN

frame_id = 0

while True:
    starting_time= time.time()
    _,frame= cap.read() # 
    frame_id+=1
    
    height,width,channels = frame.shape
    #detecting objects

    blob = cv2.dnn.blobFromImage(frame,1/127.0 ,(512,512),(0,0,0),True,crop=False) #reduce 416 to 320
    net.setInput(blob)
    outs = net.forward(outputlayers)
    #print(outs[1])

    #Showing info on screen/ get confidence score of algorithm in detecting an object in blob
    class_ids=[]
    confidences=[]
    boxes=[]
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.4:
                #object detected
                center_x= int(detection[0]*width)
                center_y= int(detection[1]*height)
                w = int(detection[2]*width)
                h = int(detection[3]*height)

                #cv2.circle(img,(center_x,center_y),10,(0,255,0),2)
                #rectangle co-ordinaters
                x=int(center_x - w/2)
                y=int(center_y - h/2)
                #cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)

                boxes.append([x,y,w,h]) #put all rectangle areas
                confidences.append(float(confidence)) #how confidence was that object detected and show that percentage
                class_ids.append(class_id) #name of the object that was detected

    indexes = cv2.dnn.NMSBoxes(boxes,confidences,0.4,0.6)

    for i in range(len(boxes)):
        if i in indexes:
            x,y,w,h = boxes[i]
            label = str(classes[class_ids[i]])
            confidence= confidences[i]
            color = colors[class_ids[i]]
            cv2.rectangle(frame,(x,y),(x+w,y+h),color,2)
            cv2.putText(frame,label+"("+str(round(confidence*100))+"%)",(x,y-1),font,1,color,2)
            

    elapsed_time = time.time() - starting_time
    print(elapsed_time)
    fps=frame_id/elapsed_time
    cv2.putText(frame,"FPS:"+str(round(fps,2)),(10,50),font,2,(0,0,0),1)
    # cv2.imshow("Image",frame)
    key = cv2.waitKey(1) #wait 1ms the loop will start again and we will process the next frame
    
    if key == 27: #esc key stops the process
        break
    
cap.release()    
cv2.destroyAllWindows()

but when I implemented the same in C++ the execution time increased and there is a huge lag in the video.

#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <opencv2/core.hpp>
#include <opencv2/core/ocl.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/dnn.hpp>
#include <chrono>
#include <time.h>
using namespace std;
using namespace cv;
using namespace dnn;
using namespace chrono;
int main()
{
    vector<string> classes;
    string classesFile = "F:/coco.names";
    Net net = readNet("F:/yolov4-tiny.weights","F:/yolov4-tiny.cfg");
    ifstream ifs(classesFile.c_str());
    string line;
    while(getline(ifs,line)) classes.push_back(line);

    vector<string> layer_names = net.getLayerNames();
    vector<string> outputlayers;
    vector<int> unconnectedlayer = net.getUnconnectedOutLayers();
    for(int i=0;i<unconnectedlayer.size();i++)
        outputlayers.push_back(layer_names[unconnectedlayer[i]-1]);
    //check till above
    VideoCapture cap(0);
    cap.set(CAP_PROP_FPS,30);
    cap.set(CAP_PROP_BUFFERSIZE,2);
    Mat frame, blob;
    time_t start, end;
    while(1){
        time(&start);
        cap.read(frame);
        blobFromImage(frame, blob, 1/255.0,Size(512,512),Scalar(0,0,0),true,false);
        net.setInput(blob);
        vector<Mat> outs;
        net.forward(outs, outputlayers);
        int framewidth=frame.cols;
        int frameheight=frame.rows;
        vector<int> class_ids;
        vector<float> confidences;
        vector<Rect> boxes;

        for (size_t i = 0; i < outs.size(); ++i)
        {
        float* data = (float*)outs[i].data;
        for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
        {
            Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
            Point classIdPoint;
            double confidence;
            minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
                if (confidence > 0.4)
                {
                    int centerX = (int)(data[0] * framewidth);
                    int centerY = (int)(data[1] * frameheight);
                    int width = (int)(data[2] * framewidth);
                    int height = (int)(data[3] * frameheight);
                    int left = centerX - width / 2;
                    int top = centerY - height / 2;
                    class_ids.push_back(classIdPoint.x);
                    confidences.push_back((float)confidence);
                    boxes.push_back(Rect(left,top,width,height));
                }
        }
       }
       vector<int> indices;
       NMSBoxes(boxes, confidences, 0.4F, 0.6F, indices);
           for (size_t i = 0; i < indices.size(); ++i)
           {
               int idx = indices[i];
               Rect box = boxes[idx];
//               float confidence=confidences[idx];
               rectangle(frame,box,Scalar(255,0,0),2);
               putText(frame,classes[class_ids[i]],Point(box.x,box.y-1),FONT_HERSHEY_PLAIN,1,Scalar(255,0,0),2);
           }
        time(&end);
        double timetaken = (double)end - (double)start ;
        cout<<fixed<<timetaken<<endl;
        imshow("Image",frame);
        int c = waitKey(27);
        if((char)c == 'c')
                break;
}
    cap.release();
    destroyAllWindows();
}

So, then I moved ahead and, On further debugging, I found out that the python application is using Gpu-3d and C++ application is using gpu-videoprocessing. Now I'm clueless about how these both things are working differently. when the code is almost same.

I'm using qt creator for this. .pro file

QT -= gui

CONFIG += c++17 console
CONFIG -= app_bundle

# You can make your code fail to compile if it uses deprecated APIs.
# In order to do so, uncomment the following line.
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000    # disables all the APIs deprecated before Qt 6.0.0

SOURCES += \
        main.cpp

# Default rules for deployment.
qnx: target.path = /tmp/$${TARGET}/bin
else: unix:!android: target.path = /opt/$${TARGET}/bin
!isEmpty(target.path): INSTALLS += target

debug {
 LIBS+= -LC:\opencv_build\install\x64\vc17\lib -lopencv_core455d -lopencv_dnn455d -lopencv_imgproc455d -lopencv_highgui455d -lopencv_imgcodecs455d -lopencv_videoio455d -lopencv_video455d -lopencv_calib3d455d -lopencv_photo455d -lopencv_features2d455d
 INCLUDEPATH += C:\opencv_build\install\include
 DEPENDPATH += C:\opencv_build\install\include
}

release {
  LIBS += -LC:\opencv_build\install\x64\vc17\lib -lopencv_core455 -lopencv_dnn455 -lopencv_imgproc455 -lopencv_highgui455 -lopencv_imgcodecs455 -lopencv_videoio455 -lopencv_video455 -lopencv_calib3d455 -lopencv_photo455 -lopencv_features2d455
  INCLUDEPATH += C:\opencv_build\install\include
  DEPENDPATH += C:\opencv_build\install\include
}


Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source