|
| 1 | +#include <opencv2/opencv.hpp> |
| 2 | +#include "opencv2/dnn.hpp" |
| 3 | +#include <iostream> |
| 4 | +#include <vector> |
| 5 | +#include <map> |
| 6 | +#include <string> |
| 7 | +#include <numeric> |
| 8 | + |
| 9 | + |
| 10 | +// YoutuReID class for person re-identification |
| 11 | +class YoutuReID { |
| 12 | +public: |
| 13 | + YoutuReID(const std::string& model_path, |
| 14 | + const cv::Size& input_size = cv::Size(128, 256), |
| 15 | + int output_dim = 768, |
| 16 | + const cv::Scalar& mean = cv::Scalar(0.485, 0.456, 0.406), |
| 17 | + const cv::Scalar& std = cv::Scalar(0.229, 0.224, 0.225), |
| 18 | + int backend_id = 0, |
| 19 | + int target_id = 0) |
| 20 | + : model_path_(model_path), input_size_(input_size), |
| 21 | + output_dim_(output_dim), mean_(mean), std_(std), |
| 22 | + backend_id_(backend_id), target_id_(target_id) |
| 23 | + { |
| 24 | + |
| 25 | + model_ = cv::dnn::readNet(model_path_); |
| 26 | + model_.setPreferableBackend(backend_id_); |
| 27 | + model_.setPreferableTarget(target_id_); |
| 28 | + } |
| 29 | + |
| 30 | + void setBackendAndTarget(int backend_id, int target_id) { |
| 31 | + backend_id_ = backend_id; |
| 32 | + target_id_ = target_id; |
| 33 | + model_.setPreferableBackend(backend_id_); |
| 34 | + model_.setPreferableTarget(target_id_); |
| 35 | + } |
| 36 | + |
| 37 | + void setInputSize(const cv::Size& input_size) { |
| 38 | + input_size_ = input_size; |
| 39 | + } |
| 40 | + |
| 41 | + // Preprocess image by resizing, normalizing, and creating a blob |
| 42 | + cv::Mat preprocess(const cv::Mat& image) { |
| 43 | + cv::Mat img; |
| 44 | + cv::cvtColor(image, img, cv::COLOR_BGR2RGB); |
| 45 | + img.convertTo(img, CV_32F, 1.0 / 255.0); |
| 46 | + |
| 47 | + // Normalize each channel separately |
| 48 | + std::vector<cv::Mat> channels(3); |
| 49 | + cv::split(img, channels); |
| 50 | + channels[0] = (channels[0] - mean_[0]) / std_[0]; |
| 51 | + channels[1] = (channels[1] - mean_[1]) / std_[1]; |
| 52 | + channels[2] = (channels[2] - mean_[2]) / std_[2]; |
| 53 | + cv::merge(channels, img); |
| 54 | + |
| 55 | + return cv::dnn::blobFromImage(img); |
| 56 | + } |
| 57 | + |
| 58 | + // Run inference to extract feature vector |
| 59 | + cv::Mat infer(const cv::Mat& image) { |
| 60 | + cv::Mat input_blob = preprocess(image); |
| 61 | + model_.setInput(input_blob); |
| 62 | + cv::Mat features = model_.forward(); |
| 63 | + |
| 64 | + if (features.dims == 4 && features.size[2] == 1 && features.size[3] == 1) { |
| 65 | + features = features.reshape(1, {1, features.size[1]}); |
| 66 | + } |
| 67 | + |
| 68 | + return features; |
| 69 | + } |
| 70 | + |
| 71 | + // Perform query, comparing each query image to each gallery image |
| 72 | + std::vector<std::vector<int>> query(const std::vector<cv::Mat>& query_img_list, |
| 73 | + const std::vector<cv::Mat>& gallery_img_list, |
| 74 | + int topK = 5) { |
| 75 | + std::vector<cv::Mat> query_features_list, gallery_features_list; |
| 76 | + cv::Mat query_features, gallery_features; |
| 77 | + |
| 78 | + for (size_t i = 0; i < query_img_list.size(); ++i) { |
| 79 | + cv::Mat feature = infer(query_img_list[i]); |
| 80 | + query_features_list.push_back(feature.clone()); |
| 81 | + } |
| 82 | + cv::vconcat(query_features_list, query_features); |
| 83 | + normalizeFeatures(query_features); |
| 84 | + |
| 85 | + for (size_t i = 0; i < gallery_img_list.size(); ++i) { |
| 86 | + cv::Mat feature = infer(gallery_img_list[i]); |
| 87 | + gallery_features_list.push_back(feature.clone()); |
| 88 | + } |
| 89 | + cv::vconcat(gallery_features_list, gallery_features); |
| 90 | + normalizeFeatures(gallery_features); |
| 91 | + |
| 92 | + cv::Mat dist = query_features * gallery_features.t(); |
| 93 | + return getTopK(dist, topK); |
| 94 | + } |
| 95 | + |
| 96 | +private: |
| 97 | + // Normalize feature vectors row-wise to unit length |
| 98 | + void normalizeFeatures(cv::Mat& features) { |
| 99 | + const float epsilon = 1e-6; |
| 100 | + for (int i = 0; i < features.rows; ++i) { |
| 101 | + cv::Mat featureRow = features.row(i); |
| 102 | + float norm = cv::norm(featureRow, cv::NORM_L2); |
| 103 | + if (norm < epsilon) { |
| 104 | + norm = epsilon; |
| 105 | + } |
| 106 | + featureRow /= norm; |
| 107 | + } |
| 108 | + } |
| 109 | + |
| 110 | + // Retrieve Top-K indices from similarity matrix |
| 111 | + std::vector<std::vector<int>> getTopK(const cv::Mat& dist, int topK) { |
| 112 | + std::vector<std::vector<int>> indices(dist.rows); |
| 113 | + |
| 114 | + for (int i = 0; i < dist.rows; ++i) { |
| 115 | + std::vector<std::pair<float, int>> sim_index_pairs; |
| 116 | + for (int j = 0; j < dist.cols; ++j) { |
| 117 | + sim_index_pairs.emplace_back(dist.at<float>(i, j), j); |
| 118 | + } |
| 119 | + std::sort(sim_index_pairs.begin(), sim_index_pairs.end(), |
| 120 | + [](const std::pair<float, int>& a, const std::pair<float, int>& b) { |
| 121 | + return a.first > b.first; |
| 122 | + }); |
| 123 | + |
| 124 | + for (int k = 0; k < topK && k < sim_index_pairs.size(); ++k) { |
| 125 | + indices[i].push_back(sim_index_pairs[k].second); |
| 126 | + } |
| 127 | + } |
| 128 | + return indices; |
| 129 | + } |
| 130 | + |
| 131 | + std::string model_path_; |
| 132 | + cv::Size input_size_; |
| 133 | + int output_dim_; |
| 134 | + cv::Scalar mean_, std_; |
| 135 | + int backend_id_; |
| 136 | + int target_id_; |
| 137 | + cv::dnn::Net model_; |
| 138 | +}; |
| 139 | + |
| 140 | +// Read images from directory and return a pair of image list and file list |
| 141 | +std::pair<std::vector<cv::Mat>, std::vector<std::string>> readImagesFromDirectory(const std::string& img_dir, int w = 128, int h = 256) { |
| 142 | + std::vector<cv::Mat> img_list; |
| 143 | + std::vector<std::string> file_list; |
| 144 | + |
| 145 | + std::vector<std::string> file_names; |
| 146 | + cv::glob(img_dir + "/*", file_names, false); |
| 147 | + |
| 148 | + for (size_t i = 0; i < file_names.size(); ++i) { |
| 149 | + std::string file_name = file_names[i].substr(file_names[i].find_last_of("/\\") + 1); |
| 150 | + cv::Mat img = cv::imread(file_names[i]); |
| 151 | + if (!img.empty()) { |
| 152 | + cv::resize(img, img, cv::Size(w, h)); |
| 153 | + img_list.push_back(img); |
| 154 | + file_list.push_back(file_name); |
| 155 | + } |
| 156 | + } |
| 157 | + return std::make_pair(img_list, file_list); |
| 158 | +} |
| 159 | + |
| 160 | +// Visualize query and gallery results by creating concatenated images |
| 161 | +std::map<std::string, cv::Mat> visualize( |
| 162 | + const std::map<std::string, std::vector<std::string>>& results, |
| 163 | + const std::string& query_dir, |
| 164 | + const std::string& gallery_dir, |
| 165 | + const cv::Size& output_size = cv::Size(128, 384)) { |
| 166 | + |
| 167 | + std::map<std::string, cv::Mat> results_vis; |
| 168 | + |
| 169 | + for (std::map<std::string, std::vector<std::string>>::const_iterator it = results.begin(); it != results.end(); ++it) { |
| 170 | + const std::string& query_file = it->first; |
| 171 | + const std::vector<std::string>& top_matches = it->second; |
| 172 | + |
| 173 | + cv::Mat query_img = cv::imread(query_dir + "/" + query_file); |
| 174 | + if (query_img.empty()) continue; |
| 175 | + |
| 176 | + cv::resize(query_img, query_img, output_size); |
| 177 | + cv::copyMakeBorder(query_img, query_img, 5, 5, 5, 5, |
| 178 | + cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0)); |
| 179 | + cv::putText(query_img, "Query", cv::Point(10, 30), |
| 180 | + cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 0), 2); |
| 181 | + |
| 182 | + cv::Mat concat_img = query_img; |
| 183 | + |
| 184 | + for (size_t i = 0; i < top_matches.size(); ++i) { |
| 185 | + cv::Mat gallery_img = cv::imread(gallery_dir + "/" + top_matches[i]); |
| 186 | + if (gallery_img.empty()) continue; |
| 187 | + |
| 188 | + cv::resize(gallery_img, gallery_img, output_size); |
| 189 | + cv::copyMakeBorder(gallery_img, gallery_img, 5, 5, 5, 5, |
| 190 | + cv::BORDER_CONSTANT, cv::Scalar(255, 255, 255)); |
| 191 | + cv::putText(gallery_img, "G" + std::to_string(i), cv::Point(10, 30), |
| 192 | + cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 0), 2); |
| 193 | + |
| 194 | + cv::hconcat(concat_img, gallery_img, concat_img); |
| 195 | + } |
| 196 | + results_vis[query_file] = concat_img; |
| 197 | + } |
| 198 | + return results_vis; |
| 199 | +} |
| 200 | + |
| 201 | +void printHelpMessage() { |
| 202 | + std::cout << "usage: demo.cpp [-h] [--query_dir QUERY_DIR] [--gallery_dir GALLERY_DIR] " |
| 203 | + << "[--backend_target BACKEND_TARGET] [--topk TOPK] [--model MODEL] [--save] [--vis]\n\n" |
| 204 | + << "ReID baseline models from Tencent Youtu Lab\n\n" |
| 205 | + << "optional arguments:\n" |
| 206 | + << " -h, --help show this help message and exit\n" |
| 207 | + << " --query_dir QUERY_DIR, -q QUERY_DIR\n" |
| 208 | + << " Query directory.\n" |
| 209 | + << " --gallery_dir GALLERY_DIR, -g GALLERY_DIR\n" |
| 210 | + << " Gallery directory.\n" |
| 211 | + << " --backend_target BACKEND_TARGET, -bt BACKEND_TARGET\n" |
| 212 | + << " Choose one of the backend-target pair to run this demo: 0: (default) OpenCV implementation + " |
| 213 | + "CPU, 1: CUDA + GPU (CUDA), 2: CUDA + GPU (CUDA FP16), 3: TIM-VX + NPU, 4: CANN + NPU\n" |
| 214 | + << " --topk TOPK Top-K closest from gallery for each query.\n" |
| 215 | + << " --model MODEL, -m MODEL\n" |
| 216 | + << " Path to the model.\n" |
| 217 | + << " --save, -s Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in " |
| 218 | + "case of camera input.\n" |
| 219 | + << " --vis, -v Usage: Specify to open a new window to show results. Invalid in case of camera input.\n"; |
| 220 | +} |
| 221 | + |
| 222 | +int main(int argc, char** argv) { |
| 223 | + // CommandLineParser setup |
| 224 | + cv::CommandLineParser parser(argc, argv, |
| 225 | + "{help h | | Show help message.}" |
| 226 | + "{query_dir q | | Query directory.}" |
| 227 | + "{gallery_dir g | | Gallery directory.}" |
| 228 | + "{backend_target bt | 0 | Choose one of the backend-target pair to run this demo: 0: (default) OpenCV implementation + CPU, " |
| 229 | + "1: CUDA + GPU (CUDA), 2: CUDA + GPU (CUDA FP16), 3: TIM-VX + NPU, 4: CANN + NPU}" |
| 230 | + "{topk k | 10 | Top-K closest from gallery for each query.}" |
| 231 | + "{model m | person_reid_youtu_2021nov.onnx | Path to the model.}" |
| 232 | + "{save s | false | Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.}" |
| 233 | + "{vis v | false | Usage: Specify to open a new window to show results. Invalid in case of camera input.}"); |
| 234 | + |
| 235 | + if (parser.has("help")) { |
| 236 | + printHelpMessage(); |
| 237 | + return 0; |
| 238 | + } |
| 239 | + |
| 240 | + std::string query_dir = parser.get<std::string>("query_dir"); |
| 241 | + std::string gallery_dir = parser.get<std::string>("gallery_dir"); |
| 242 | + int backend_target = parser.get<int>("backend_target"); |
| 243 | + int topK = parser.get<int>("topk"); |
| 244 | + std::string model_path = parser.get<std::string>("model"); |
| 245 | + bool save_flag = parser.get<bool>("save"); |
| 246 | + bool vis_flag = parser.get<bool>("vis"); |
| 247 | + |
| 248 | + if (!parser.check()) { |
| 249 | + parser.printErrors(); |
| 250 | + return 1; |
| 251 | + } |
| 252 | + |
| 253 | + const std::vector<std::pair<int, int>> backend_target_pairs = { |
| 254 | + {cv::dnn::DNN_BACKEND_OPENCV, cv::dnn::DNN_TARGET_CPU}, |
| 255 | + {cv::dnn::DNN_BACKEND_CUDA, cv::dnn::DNN_TARGET_CUDA}, |
| 256 | + {cv::dnn::DNN_BACKEND_CUDA, cv::dnn::DNN_TARGET_CUDA_FP16}, |
| 257 | + {cv::dnn::DNN_BACKEND_TIMVX, cv::dnn::DNN_TARGET_NPU}, |
| 258 | + {cv::dnn::DNN_BACKEND_CANN, cv::dnn::DNN_TARGET_NPU} |
| 259 | + }; |
| 260 | + |
| 261 | + int backend_id = backend_target_pairs[backend_target].first; |
| 262 | + int target_id = backend_target_pairs[backend_target].second; |
| 263 | + |
| 264 | + YoutuReID reid(model_path, cv::Size(128, 256), 768, |
| 265 | + cv::Scalar(0.485, 0.456, 0.406), |
| 266 | + cv::Scalar(0.229, 0.224, 0.225), |
| 267 | + backend_id, target_id); |
| 268 | + |
| 269 | + std::pair<std::vector<cv::Mat>, std::vector<std::string>> query_data = readImagesFromDirectory(query_dir); |
| 270 | + std::pair<std::vector<cv::Mat>, std::vector<std::string>> gallery_data = readImagesFromDirectory(gallery_dir); |
| 271 | + |
| 272 | + std::vector<std::vector<int>> indices = reid.query(query_data.first, gallery_data.first, topK); |
| 273 | + |
| 274 | + std::map<std::string, std::vector<std::string>> results; |
| 275 | + for (size_t i = 0; i < query_data.second.size(); ++i) { |
| 276 | + std::vector<std::string> top_matches; |
| 277 | + for (int idx : indices[i]) { |
| 278 | + top_matches.push_back(gallery_data.second[idx]); |
| 279 | + } |
| 280 | + results[query_data.second[i]] = top_matches; |
| 281 | + std::cout << "Query: " << query_data.second[i] << "\n"; |
| 282 | + std::cout << "\tTop-" << topK << " from gallery: "; |
| 283 | + for (size_t j = 0; j < top_matches.size(); ++j) { |
| 284 | + std::cout << top_matches[j] << " "; |
| 285 | + } |
| 286 | + std::cout << std::endl; |
| 287 | + } |
| 288 | + |
| 289 | + std::map<std::string, cv::Mat> results_vis = visualize(results, query_dir, gallery_dir); |
| 290 | + |
| 291 | + if (save_flag) { |
| 292 | + for (std::map<std::string, cv::Mat>::iterator it = results_vis.begin(); it != results_vis.end(); ++it) { |
| 293 | + std::string save_path = "result-" + it->first; |
| 294 | + cv::imwrite(save_path, it->second); |
| 295 | + } |
| 296 | + } |
| 297 | + |
| 298 | + if (vis_flag) { |
| 299 | + for (std::map<std::string, cv::Mat>::iterator it = results_vis.begin(); it != results_vis.end(); ++it) { |
| 300 | + cv::namedWindow("result-" + it->first, cv::WINDOW_AUTOSIZE); |
| 301 | + cv::imshow("result-" + it->first, it->second); |
| 302 | + cv::waitKey(0); |
| 303 | + cv::destroyAllWindows(); |
| 304 | + } |
| 305 | + } |
| 306 | + |
| 307 | + return 0; |
| 308 | +} |
0 commit comments