'Video Lagging while Object Detection on C++
There is an object detection pre-trained model i.e. Yolov3/v4-tiny, when the algorithm is implemented in python, everything looked good, there is no lag while processing the video when displaying it over "Imshow".
import cv2
import numpy as np
import time
#net = cv2.dnn.readNet("yolov3.weights","yolov3.cfg") # Original yolov3
net = cv2.dnn.readNet("yolov4-tiny.weights","yolov4-tiny.cfg") #Tiny Yolo
classes = []
with open("coco.names","r") as f:
classes = [line.strip() for line in f.readlines()]
# print(classes)
layer_names = net.getLayerNames()
# print(layer_names)
outputlayers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
print(outputlayers)
colors= np.random.uniform(0,255,size=(len(classes),3))
# print(colors)
cap=cv2.VideoCapture(0) #0 for 1st webcam
cap.set(cv2.CAP_PROP_FPS, 30)
cap.set(cv2.CAP_PROP_BUFFERSIZE, 2)
font = cv2.FONT_HERSHEY_PLAIN
frame_id = 0
while True:
starting_time= time.time()
_,frame= cap.read() #
frame_id+=1
height,width,channels = frame.shape
#detecting objects
blob = cv2.dnn.blobFromImage(frame,1/127.0 ,(512,512),(0,0,0),True,crop=False) #reduce 416 to 320
net.setInput(blob)
outs = net.forward(outputlayers)
#print(outs[1])
#Showing info on screen/ get confidence score of algorithm in detecting an object in blob
class_ids=[]
confidences=[]
boxes=[]
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.4:
#object detected
center_x= int(detection[0]*width)
center_y= int(detection[1]*height)
w = int(detection[2]*width)
h = int(detection[3]*height)
#cv2.circle(img,(center_x,center_y),10,(0,255,0),2)
#rectangle co-ordinaters
x=int(center_x - w/2)
y=int(center_y - h/2)
#cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)
boxes.append([x,y,w,h]) #put all rectangle areas
confidences.append(float(confidence)) #how confidence was that object detected and show that percentage
class_ids.append(class_id) #name of the object that was detected
indexes = cv2.dnn.NMSBoxes(boxes,confidences,0.4,0.6)
for i in range(len(boxes)):
if i in indexes:
x,y,w,h = boxes[i]
label = str(classes[class_ids[i]])
confidence= confidences[i]
color = colors[class_ids[i]]
cv2.rectangle(frame,(x,y),(x+w,y+h),color,2)
cv2.putText(frame,label+"("+str(round(confidence*100))+"%)",(x,y-1),font,1,color,2)
elapsed_time = time.time() - starting_time
print(elapsed_time)
fps=frame_id/elapsed_time
cv2.putText(frame,"FPS:"+str(round(fps,2)),(10,50),font,2,(0,0,0),1)
# cv2.imshow("Image",frame)
key = cv2.waitKey(1) #wait 1ms the loop will start again and we will process the next frame
if key == 27: #esc key stops the process
break
cap.release()
cv2.destroyAllWindows()
but when I implemented the same in C++ the execution time increased and there is a huge lag in the video.
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <opencv2/core.hpp>
#include <opencv2/core/ocl.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/dnn.hpp>
#include <chrono>
#include <time.h>
using namespace std;
using namespace cv;
using namespace dnn;
using namespace chrono;
int main()
{
vector<string> classes;
string classesFile = "F:/coco.names";
Net net = readNet("F:/yolov4-tiny.weights","F:/yolov4-tiny.cfg");
ifstream ifs(classesFile.c_str());
string line;
while(getline(ifs,line)) classes.push_back(line);
vector<string> layer_names = net.getLayerNames();
vector<string> outputlayers;
vector<int> unconnectedlayer = net.getUnconnectedOutLayers();
for(int i=0;i<unconnectedlayer.size();i++)
outputlayers.push_back(layer_names[unconnectedlayer[i]-1]);
//check till above
VideoCapture cap(0);
cap.set(CAP_PROP_FPS,30);
cap.set(CAP_PROP_BUFFERSIZE,2);
Mat frame, blob;
time_t start, end;
while(1){
time(&start);
cap.read(frame);
blobFromImage(frame, blob, 1/255.0,Size(512,512),Scalar(0,0,0),true,false);
net.setInput(blob);
vector<Mat> outs;
net.forward(outs, outputlayers);
int framewidth=frame.cols;
int frameheight=frame.rows;
vector<int> class_ids;
vector<float> confidences;
vector<Rect> boxes;
for (size_t i = 0; i < outs.size(); ++i)
{
float* data = (float*)outs[i].data;
for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
{
Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
Point classIdPoint;
double confidence;
minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
if (confidence > 0.4)
{
int centerX = (int)(data[0] * framewidth);
int centerY = (int)(data[1] * frameheight);
int width = (int)(data[2] * framewidth);
int height = (int)(data[3] * frameheight);
int left = centerX - width / 2;
int top = centerY - height / 2;
class_ids.push_back(classIdPoint.x);
confidences.push_back((float)confidence);
boxes.push_back(Rect(left,top,width,height));
}
}
}
vector<int> indices;
NMSBoxes(boxes, confidences, 0.4F, 0.6F, indices);
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
Rect box = boxes[idx];
// float confidence=confidences[idx];
rectangle(frame,box,Scalar(255,0,0),2);
putText(frame,classes[class_ids[i]],Point(box.x,box.y-1),FONT_HERSHEY_PLAIN,1,Scalar(255,0,0),2);
}
time(&end);
double timetaken = (double)end - (double)start ;
cout<<fixed<<timetaken<<endl;
imshow("Image",frame);
int c = waitKey(27);
if((char)c == 'c')
break;
}
cap.release();
destroyAllWindows();
}
So, then I moved ahead and, On further debugging, I found out that the python application is using Gpu-3d and C++ application is using gpu-videoprocessing. Now I'm clueless about how these both things are working differently. when the code is almost same.
I'm using qt creator for this. .pro file
QT -= gui
CONFIG += c++17 console
CONFIG -= app_bundle
# You can make your code fail to compile if it uses deprecated APIs.
# In order to do so, uncomment the following line.
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
SOURCES += \
main.cpp
# Default rules for deployment.
qnx: target.path = /tmp/$${TARGET}/bin
else: unix:!android: target.path = /opt/$${TARGET}/bin
!isEmpty(target.path): INSTALLS += target
debug {
LIBS+= -LC:\opencv_build\install\x64\vc17\lib -lopencv_core455d -lopencv_dnn455d -lopencv_imgproc455d -lopencv_highgui455d -lopencv_imgcodecs455d -lopencv_videoio455d -lopencv_video455d -lopencv_calib3d455d -lopencv_photo455d -lopencv_features2d455d
INCLUDEPATH += C:\opencv_build\install\include
DEPENDPATH += C:\opencv_build\install\include
}
release {
LIBS += -LC:\opencv_build\install\x64\vc17\lib -lopencv_core455 -lopencv_dnn455 -lopencv_imgproc455 -lopencv_highgui455 -lopencv_imgcodecs455 -lopencv_videoio455 -lopencv_video455 -lopencv_calib3d455 -lopencv_photo455 -lopencv_features2d455
INCLUDEPATH += C:\opencv_build\install\include
DEPENDPATH += C:\opencv_build\install\include
}
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|