生活随笔
收集整理的這篇文章主要介紹了
两百行C++代码实现yolov5车辆计数部署(通俗易懂版)
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
本文是文章傳統圖像處理方法實現車輛計數的后續。這里用OpenCV實現了基于yolov5檢測器的單向車輛計數功能,方法是撞線計數。該代碼只能演示視頻demo效果,一些功能未完善,離實際工程應用還有距離。
實現流程:
(1)訓練yolov5模型,這里就沒有自己訓練了,直接使用官方的開源模型yolov5s.pt;
(2)運行yolov5工程下面的export.py,將pt模型轉成onnx模型;
(3)編寫yolov5部署的C++工程,包括前處理、推理和后處理部分;
(4)讀取視頻第一幀,用yolov5檢測第一幀圖像的車輛目標,計算這些檢測框的中心點,
(5)讀取視頻的后續幀,用yolov5檢測每幀圖像上的車輛目標,計算新目標和上一幀圖像中檢測框中心點的距離矩陣;
(6)通過距離矩陣確定新舊目標檢測框之間的對應關系;
(7)計算對應新舊目標檢測框中心點之間的連線,判斷和事先設置的虛擬撞線是否相交,若相交則計數加1;
(8)重復(5)-(7)。
由于程序在CPU端運行,為了提速,實際實現的時候采取的是隔幀判斷而不是使用相鄰幀,v1的代碼實現如下:
#include <iostream>
#include <fstream>
#include <opencv2/opencv.hpp>
const float INPUT_WIDTH
= 640.0;
const float INPUT_HEIGHT
= 640.0;
const float SCORE_THRESHOLD
= 0.5;
const float NMS_THRESHOLD
= 0.45;
const float CONFIDENCE_THRESHOLD
= 0.45;const std
::vector
<std
::string
> class_name
= {
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush" };
void draw_label(cv
::Mat
& input_image
, std
::string label
, int left
, int top
)
{int baseLine
;cv
::Size label_size
= cv
::getTextSize(label
, 0.7, 0.7, 1, &baseLine
);top
= std
::max(top
, label_size
.height
);cv
::Point tlc
= cv
::Point(left
, top
);cv
::Point brc
= cv
::Point(left
, top
+ label_size
.height
+ baseLine
);cv
::putText(input_image
, label
, cv
::Point(left
, top
+ label_size
.height
), cv
::FONT_HERSHEY_SIMPLEX
, 0.7, cv
::Scalar(0, 255, 255), 1);
}
std
::vector
<cv
::Mat
> preprocess(cv
::Mat
& input_image
, cv
::dnn
::Net
& net
)
{cv
::Mat blob
;cv
::dnn
::blobFromImage(input_image
, blob
, 1. / 255., cv
::Size(INPUT_WIDTH
, INPUT_HEIGHT
), cv
::Scalar(), true, false);net
.setInput(blob
);std
::vector
<cv
::Mat
> preprcess_image
;net
.forward(preprcess_image
, net
.getUnconnectedOutLayersNames());return preprcess_image
;
}
std
::vector
<cv
::Rect
> postprocess(std
::vector
<cv
::Mat
>& preprcess_image
, cv
::Mat
& output_image
)
{std
::vector
<int> class_ids
;std
::vector
<float> confidences
;std
::vector
<cv
::Rect
> boxes
;std
::vector
<cv
::Rect
> boxes_nms
;float x_factor
= output_image
.cols
/ INPUT_WIDTH
;float y_factor
= output_image
.rows
/ INPUT_HEIGHT
;float* data
= (float*)preprcess_image
[0].data
;const int dimensions
= 85;const int rows
= 25200;for (int i
= 0; i
< rows
; ++i
){float confidence
= data
[4];if (confidence
>= CONFIDENCE_THRESHOLD
){float* classes_scores
= data
+ 5;cv
::Mat
scores(1, class_name
.size(), CV_32FC1
, classes_scores
);cv
::Point class_id
;double max_class_score
;cv
::minMaxLoc(scores
, 0, &max_class_score
, 0, &class_id
);if (max_class_score
> SCORE_THRESHOLD
){confidences
.push_back(confidence
);class_ids
.push_back(class_id
.x
);float cx
= data
[0];float cy
= data
[1];float w
= data
[2];float h
= data
[3];int left
= int((cx
- 0.5 * w
) * x_factor
);int top
= int((cy
- 0.5 * h
) * y_factor
);int width
= int(w
* x_factor
);int height
= int(h
* y_factor
);boxes
.push_back(cv
::Rect(left
, top
, width
, height
));}}data
+= 85;}std
::vector
<int> indices
;cv
::dnn
::NMSBoxes(boxes
, confidences
, SCORE_THRESHOLD
, NMS_THRESHOLD
, indices
);for (size_t i
= 0; i
< indices
.size(); i
++){int idx
= indices
[i
];cv
::Rect box
= boxes
[idx
];boxes_nms
.push_back(box
);int left
= box
.x
;int top
= box
.y
;int width
= box
.width
;int height
= box
.height
;cv
::rectangle(output_image
, cv
::Point(left
, top
), cv
::Point(left
+ width
, top
+ height
), cv
::Scalar(255, 0, 0), 1);std
::string label
= cv
::format("%.2f", confidences
[idx
]);label
= class_name
[class_ids
[idx
]] + ":" + label
;draw_label(output_image
, label
, left
, top
);}return boxes_nms
;
}std
::vector
<cv
::Point
> get_centers(std
::vector
<cv
::Rect
> detections
)
{std
::vector
<cv
::Point
> detections_centers(detections
.size());for (size_t i
= 0; i
< detections
.size(); i
++){detections_centers
[i
] = cv
::Point(detections
[i
].x
+ detections
[i
].width
/ 2, detections
[i
].y
+ detections
[i
].height
/ 2);}return detections_centers
;
}float get_distance(cv
::Point p1
, cv
::Point p2
)
{return sqrt(pow(p1
.x
- p2
.x
, 2) + pow(p1
.y
- p2
.y
, 2));
}bool is_cross(cv
::Point p1
, cv
::Point p2
)
{if (p1
.x
== p2
.x
) return false;int y
= 500; float k
= (p1
.y
- p2
.y
) / (p1
.x
- p2
.x
); float b
= p1
.y
- k
* p1
.x
; float x
= (y
- b
) / k
;return (x
> std
::min(p1
.x
, p2
.x
) && x
< std
::max(p1
.x
, p2
.x
));
}int main(int argc
, char** argv
)
{cv
::VideoCapture
capture("test.mp4");cv
::Mat frame
;cv
::dnn
::Net net
= cv
::dnn
::readNet("yolov5s-f32.onnx");int frame_num
= 0;int count
= 0;std
::vector
<cv
::Point
> detections_centers_old
;std
::vector
<cv
::Point
> detections_centers_new
;while(cv
::waitKey(1) < 0){capture
>> frame
;if (frame
.empty())break;std
::cout
<< "******************************************************************* frame_num: " << frame_num
<< std
::endl
;cv
::Mat image
= frame
.clone();std
::vector
<cv
::Mat
> preprcess_image
= preprocess(image
, net
);std
::vector
<cv
::Rect
> detections
= postprocess(preprcess_image
, image
);if (frame_num
== 0){detections_centers_old
= get_centers(detections
);std
::cout
<< "detections_center:" << std
::endl
;for (size_t i
= 0; i
< detections_centers_old
.size(); i
++){std
::cout
<< detections_centers_old
[i
] << std
::endl
;}}else if (frame_num
% 2 == 0){detections_centers_new
= get_centers(detections
);std
::cout
<< "detections_center:" << std
::endl
;for (size_t i
= 0; i
< detections_centers_new
.size(); i
++){std
::cout
<< detections_centers_new
[i
] << std
::endl
;}std
::vector
<std
::vector
<float>> distance_matrix(detections_centers_new
.size(), std
::vector<float>(detections_centers_old
.size()));std
::cout
<< "distance_matrix:" << std
::endl
;for (size_t i
= 0; i
< detections_centers_new
.size(); i
++){for (size_t j
= 0; j
< detections_centers_old
.size(); j
++){distance_matrix
[i
][j
] = get_distance(detections_centesr_new
[i
], detections_centers_old
[j
]); std
::cout
<< distance_matrix
[i
][j
] << " ";}std
::cout
<< std
::endl
;}std
::cout
<< "min_index:" << std
::endl
;std
::vector
<float> min_indices(detections_centers_new
.size());for (size_t i
= 0; i
< detections_centers_new
.size(); i
++){std
::vector
<float> distance_vector
= distance_matrix
[i
];int min_index
= std
::min_element(distance_vector
.begin(), distance_vector
.end()) - distance_vector
.begin();min_indices
[i
] = min_index
;std
::cout
<< min_index
<< " ";}std
::cout
<< std
::endl
;for (size_t i
= 0; i
< detections_centers_new
.size(); i
++){cv
::Point p1
= detections_centers_new
[i
];cv
::Point p2
= detections_centers_old
[min_indices
[i
]];std
::cout
<< p1
<< " " << p2
<< std
::endl
;if (is_cross(p1
, p2
)){std
::cout
<< "is_cross" << p1
<< " " << p2
<< std
::endl
;count
++;}}detections_centers_old
= detections_centers_new
;}frame_num
++;cv
::putText(image
, "car num: " + std
::to_string(count
), cv
::Point(20, 50), cv
::FONT_HERSHEY_SIMPLEX
, 0.7, cv
::Scalar(0, 255, 255), 1);cv
::line(image
, cv
::Point(0, 500), cv
::Point(1280, 500) , cv
::Scalar(0, 0, 255));cv
::imshow("output", image
);cv
::imwrite(std
::to_string(frame_num
) + ".jpg", image
);}capture
.release();return 0;
}
在調試中,發現v1的實現存在如下問題:出現新目標的時候,計算新舊檢測框的對應關系出現匹配錯誤,導致計數偏多。因此在v2中設置匹配的距離閾值,并簡化了判斷檢測框中心點連線和撞線是否相交的方法。
v2的代碼實現如下:
#include <iostream>
#include <opencv2/opencv.hpp>
const float INPUT_WIDTH
= 640.0;
const float INPUT_HEIGHT
= 640.0;
const float SCORE_THRESHOLD
= 0.5;
const float NMS_THRESHOLD
= 0.25;
const float CONFIDENCE_THRESHOLD
= 0.5;const std
::vector
<std
::string
> class_name
= {"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light","fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow","elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee","skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard","tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple","sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch","potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone","microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear","hair drier", "toothbrush" };const int IMAGE_WIDTH
= 1280;
const int IMAGE_HEIGHT
= 720;
const int LINE_HEIGHT
= IMAGE_HEIGHT
/ 2;
void draw_label(cv
::Mat
& input_image
, std
::string label
, int left
, int top
)
{int baseLine
;cv
::Size label_size
= cv
::getTextSize(label
, 0.7, 0.7, 1, &baseLine
);top
= std
::max(top
, label_size
.height
);cv
::Point tlc
= cv
::Point(left
, top
);cv
::Point brc
= cv
::Point(left
, top
+ label_size
.height
+ baseLine
);cv
::putText(input_image
, label
, cv
::Point(left
, top
+ label_size
.height
), cv
::FONT_HERSHEY_SIMPLEX
, 0.7, cv
::Scalar(0, 255, 255), 1);
}
std
::vector
<cv
::Mat
> preprocess(cv
::Mat
& input_image
, cv
::dnn
::Net
& net
)
{cv
::Mat blob
;cv
::dnn
::blobFromImage(input_image
, blob
, 1. / 255., cv
::Size(INPUT_WIDTH
, INPUT_HEIGHT
), cv
::Scalar(), true, false);net
.setInput(blob
);std
::vector
<cv
::Mat
> preprcess_image
;net
.forward(preprcess_image
, net
.getUnconnectedOutLayersNames());return preprcess_image
;
}
std
::vector
<cv
::Rect
> postprocess(std
::vector
<cv
::Mat
>& preprcess_image
, cv
::Mat
& output_image
)
{std
::vector
<int> class_ids
;std
::vector
<float> confidences
;std
::vector
<cv
::Rect
> boxes
;std
::vector
<cv
::Rect
> boxes_nms
;float x_factor
= output_image
.cols
/ INPUT_WIDTH
;float y_factor
= output_image
.rows
/ INPUT_HEIGHT
;float* data
= (float*)preprcess_image
[0].data
;const int dimensions
= 85;const int rows
= 25200;for (int i
= 0; i
< rows
; ++i
){float confidence
= data
[4];if (confidence
>= CONFIDENCE_THRESHOLD
){float* classes_scores
= data
+ 5;cv
::Mat
scores(1, class_name
.size(), CV_32FC1
, classes_scores
);cv
::Point class_id
;double max_class_score
;cv
::minMaxLoc(scores
, 0, &max_class_score
, 0, &class_id
);if (max_class_score
> SCORE_THRESHOLD
){confidences
.push_back(confidence
);class_ids
.push_back(class_id
.x
);float cx
= data
[0];float cy
= data
[1];float w
= data
[2];float h
= data
[3];int left
= int((cx
- 0.5 * w
) * x_factor
);int top
= int((cy
- 0.5 * h
) * y_factor
);int width
= int(w
* x_factor
);int height
= int(h
* y_factor
);boxes
.push_back(cv
::Rect(left
, top
, width
, height
));}}data
+= 85;}std
::vector
<int> indices
;cv
::dnn
::NMSBoxes(boxes
, confidences
, SCORE_THRESHOLD
, NMS_THRESHOLD
, indices
);for (size_t i
= 0; i
< indices
.size(); i
++){int idx
= indices
[i
];cv
::Rect box
= boxes
[idx
];boxes_nms
.push_back(box
);int left
= box
.x
;int top
= box
.y
;int width
= box
.width
;int height
= box
.height
;cv
::rectangle(output_image
, cv
::Point(left
, top
), cv
::Point(left
+ width
, top
+ height
), cv
::Scalar(255, 0, 0), 1);std
::string label
= cv
::format("%.2f", confidences
[idx
]);label
= "car";draw_label(output_image
, label
, left
, top
);}return boxes_nms
;
}
std
::vector
<cv
::Point
> get_centers(std
::vector
<cv
::Rect
> detections
)
{std
::vector
<cv
::Point
> detections_centers(detections
.size());for (size_t i
= 0; i
< detections
.size(); i
++){detections_centers
[i
] = cv
::Point(detections
[i
].x
+ detections
[i
].width
/ 2, detections
[i
].y
+ detections
[i
].height
/ 2);}return detections_centers
;
}
float get_distance(cv
::Point p1
, cv
::Point p2
)
{return sqrt(pow(p1
.x
- p2
.x
, 2) + pow(p1
.y
- p2
.y
, 2));
}
bool is_cross(cv
::Point p1
, cv
::Point p2
)
{return (p1
.y
<= LINE_HEIGHT
&& p2
.y
> LINE_HEIGHT
) || (p1
.y
> LINE_HEIGHT
&& p2
.y
<= LINE_HEIGHT
);
}int main(int argc
, char** argv
)
{cv
::VideoCapture
capture("test.mp4");cv
::Mat frame
;cv
::dnn
::Net net
= cv
::dnn
::readNet("yolov5s-f32.onnx");int frame_num
= 0;int count
= 0;std
::vector
<cv
::Point
> detections_centers_old
;std
::vector
<cv
::Point
> detections_centers_new
;while(cv
::waitKey(1) < 0){capture
>> frame
;if (frame
.empty())break;std
::cout
<< "******************************************************************* frame_num: " << frame_num
<< std
::endl
;cv
::Mat image
= frame
.clone();std
::vector
<cv
::Mat
> preprcess_image
= preprocess(image
, net
);std
::vector
<cv
::Rect
> detections
= postprocess(preprcess_image
, image
);if (frame_num
== 0){detections_centers_old
= get_centers(detections
);#ifdef DEBUGstd
::cout
<< "detections_center:" << std
::endl
;for (size_t i
= 0; i
< detections_centers_old
.size(); i
++){std
::cout
<< detections_centers_old
[i
] << std
::endl
;}
#endif }else if (frame_num
% 2 == 0){detections_centers_new
= get_centers(detections
);#ifdef DEBUGstd
::cout
<< "detections_center:" << std
::endl
;for (size_t i
= 0; i
< detections_centers_new
.size(); i
++){std
::cout
<< detections_centers_new
[i
] << std
::endl
;}
#endif std
::vector
<std
::vector
<float>> distance_matrix(detections_centers_new
.size(), std
::vector<float>(detections_centers_old
.size())); for (size_t i
= 0; i
< detections_centers_new
.size(); i
++){for (size_t j
= 0; j
< detections_centers_old
.size(); j
++){distance_matrix
[i
][j
] = get_distance(detections_centers_new
[i
], detections_centers_old
[j
]); }}#ifdef DEBUGstd
::cout
<< "min_index:" << std
::endl
;
#endif std
::vector
<float> min_indices(detections_centers_new
.size());for (size_t i
= 0; i
< detections_centers_new
.size(); i
++){std
::vector
<float> distance_vector
= distance_matrix
[i
];float min_val
= *std
::min_element(distance_vector
.begin(), distance_vector
.end());int min_index
= -1;if (min_val
< LINE_HEIGHT
/ 5)min_index
= std
::min_element(distance_vector
.begin(), distance_vector
.end()) - distance_vector
.begin();min_indices
[i
] = min_index
;
#ifdef DEBUGstd
::cout
<< min_index
<< " ";
#endif }std
::cout
<< std
::endl
;for (size_t i
= 0; i
< detections_centers_new
.size(); i
++){if (min_indices
[i
] < 0)continue;cv
::Point p1
= detections_centers_new
[i
];cv
::Point p2
= detections_centers_old
[min_indices
[i
]];#ifdef DEBUGstd
::cout
<< p1
<< " " << p2
<< std
::endl
;
#endif if (is_cross(p1
, p2
)){
#ifdef DEBUGstd
::cout
<< "is_cross" << p1
<< " " << p2
<< std
::endl
;
#endif count
++;}}detections_centers_old
= detections_centers_new
;}cv
::putText(image
, "car num: " + std
::to_string(count
), cv
::Point(20, 50), cv
::FONT_HERSHEY_SIMPLEX
, 0.7, cv
::Scalar(0, 0, 255), 1);cv
::line(image
, cv
::Point(0, LINE_HEIGHT
), cv
::Point(IMAGE_WIDTH
, LINE_HEIGHT
), cv
::Scalar(0, 0, 255));cv
::imshow("output", image
);#ifdef DEBUGif (frame_num
% 2 == 0)cv
::imwrite(std
::to_string(frame_num
) + ".jpg", image
);
#endif frame_num
++;}capture
.release();return 0;
}
檢測效果如下圖,效果還是可以的,比傳統方法有大幅提升。完整視頻中有一次計數異常(總共52輛向圖像上方行駛的車輛,檢出53輛),是因為檢測器不準導致車輛檢測框位置漂移,可以后續優化。注:由于官方提供的coco80類的開源權重文件用于車輛檢測效果不是很好,LZ把檢測出的類別直接固定為car,實際應自己重新訓練一個車輛檢測的模型。
更詳細注釋的代碼、測試視頻和轉好的權重文件放在下載鏈接:點擊跳轉
總結
以上是生活随笔為你收集整理的两百行C++代码实现yolov5车辆计数部署(通俗易懂版)的全部內容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。