C++ Demo for person_reid_youtureid (#277)

redhecker · Gongjunzhe12210401 · web-flow · commit 807f45b6de9d · 2024-11-16T01:54:54.000+08:00
* add demo.cpp

* add CMakeLists.txt

* Update README.md

* turn standard to c++11

---------

Co-authored-by: Gongjunzhe12210401 &lt;147415210+Gongjunzhe12210401@users.noreply.github.com&gt;
diff --git a/models/person_reid_youtureid/CMakeLists.txt b/models/person_reid_youtureid/CMakeLists.txt
@@ -0,0 +1,11 @@
+cmake_minimum_required(VERSION 3.24.0)
+project(opencv_zoo_person_reid_youtureid)
+
+set(OPENCV_VERSION "4.10.0")
+set(OPENCV_INSTALLATION_PATH "" CACHE PATH "Where to look for OpenCV installation")
+
+# Find OpenCV
+find_package(OpenCV ${OPENCV_VERSION} REQUIRED HINTS ${OPENCV_INSTALLATION_PATH})
+
+add_executable(demo demo.cpp)
+target_link_libraries(demo ${OpenCV_LIBS})
diff --git a/models/person_reid_youtureid/README.md b/models/person_reid_youtureid/README.md
@@ -10,13 +10,26 @@ This model is provided by Tencent Youtu Lab [[Credits]](https://github.com/openc
 
 Run the following command to try the demo:
 
+### Python
 ```shell
 python demo.py --query_dir /path/to/query --gallery_dir /path/to/gallery -v
 
 # get help regarding various parameters
 python demo.py --help
 ```
 
+### C++
+```shell
+# A typical and default installation path of OpenCV is /usr/local
+cmake -B build -D OPENCV_INSTALLATION_PATH=/path/to/opencv/installation .
+cmake --build build
+
+./build/demo --query_dir=/path/to/query --gallery_dir=/path/to/gallery -v
+
+# get help regarding various parameters
+./build/demo --help
+```
+
 ### License
 
 All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
diff --git a/models/person_reid_youtureid/demo.cpp b/models/person_reid_youtureid/demo.cpp
@@ -0,0 +1,308 @@
+#include <opencv2/opencv.hpp>
+#include "opencv2/dnn.hpp"
+#include <iostream>
+#include <vector>
+#include <map>
+#include <string>
+#include <numeric>
+
+
+// YoutuReID class for person re-identification
+class YoutuReID {
+public:
+    YoutuReID(const std::string& model_path,
+              const cv::Size& input_size = cv::Size(128, 256),
+              int output_dim = 768,
+              const cv::Scalar& mean = cv::Scalar(0.485, 0.456, 0.406),
+              const cv::Scalar& std = cv::Scalar(0.229, 0.224, 0.225),
+              int backend_id = 0,
+              int target_id = 0)
+        : model_path_(model_path), input_size_(input_size),
+          output_dim_(output_dim), mean_(mean), std_(std),
+          backend_id_(backend_id), target_id_(target_id)
+    {
+        
+        model_ = cv::dnn::readNet(model_path_);
+        model_.setPreferableBackend(backend_id_);
+        model_.setPreferableTarget(target_id_);
+    }
+
+    void setBackendAndTarget(int backend_id, int target_id) {
+        backend_id_ = backend_id;
+        target_id_ = target_id;
+        model_.setPreferableBackend(backend_id_);
+        model_.setPreferableTarget(target_id_);
+    }
+
+    void setInputSize(const cv::Size& input_size) {
+        input_size_ = input_size;
+    }
+
+    // Preprocess image by resizing, normalizing, and creating a blob
+    cv::Mat preprocess(const cv::Mat& image) {
+        cv::Mat img;
+        cv::cvtColor(image, img, cv::COLOR_BGR2RGB);
+        img.convertTo(img, CV_32F, 1.0 / 255.0);
+
+        // Normalize each channel separately
+        std::vector<cv::Mat> channels(3);
+        cv::split(img, channels);
+        channels[0] = (channels[0] - mean_[0]) / std_[0];
+        channels[1] = (channels[1] - mean_[1]) / std_[1];
+        channels[2] = (channels[2] - mean_[2]) / std_[2];
+        cv::merge(channels, img);
+
+        return cv::dnn::blobFromImage(img);
+    }
+
+    // Run inference to extract feature vector
+    cv::Mat infer(const cv::Mat& image) {
+        cv::Mat input_blob = preprocess(image);
+        model_.setInput(input_blob);
+        cv::Mat features = model_.forward();
+
+        if (features.dims == 4 && features.size[2] == 1 && features.size[3] == 1) {
+            features = features.reshape(1, {1, features.size[1]});
+        }
+
+        return features;
+    }
+
+    // Perform query, comparing each query image to each gallery image
+    std::vector<std::vector<int>> query(const std::vector<cv::Mat>& query_img_list,
+                                        const std::vector<cv::Mat>& gallery_img_list,
+                                        int topK = 5) {
+        std::vector<cv::Mat> query_features_list, gallery_features_list;
+        cv::Mat query_features, gallery_features;
+
+        for (size_t i = 0; i < query_img_list.size(); ++i) {
+            cv::Mat feature = infer(query_img_list[i]);
+            query_features_list.push_back(feature.clone());
+        }
+        cv::vconcat(query_features_list, query_features);
+        normalizeFeatures(query_features);
+
+        for (size_t i = 0; i < gallery_img_list.size(); ++i) {
+            cv::Mat feature = infer(gallery_img_list[i]);
+            gallery_features_list.push_back(feature.clone());
+        }
+        cv::vconcat(gallery_features_list, gallery_features);
+        normalizeFeatures(gallery_features);
+
+        cv::Mat dist = query_features * gallery_features.t();
+        return getTopK(dist, topK);
+    }
+
+private:
+    // Normalize feature vectors row-wise to unit length
+    void normalizeFeatures(cv::Mat& features) {
+        const float epsilon = 1e-6;
+        for (int i = 0; i < features.rows; ++i) {
+            cv::Mat featureRow = features.row(i);
+            float norm = cv::norm(featureRow, cv::NORM_L2);
+            if (norm < epsilon) {
+                norm = epsilon;
+            }
+            featureRow /= norm;
+        }
+    }
+
+    // Retrieve Top-K indices from similarity matrix
+    std::vector<std::vector<int>> getTopK(const cv::Mat& dist, int topK) {
+        std::vector<std::vector<int>> indices(dist.rows);
+        
+        for (int i = 0; i < dist.rows; ++i) {
+            std::vector<std::pair<float, int>> sim_index_pairs;
+            for (int j = 0; j < dist.cols; ++j) {
+                sim_index_pairs.emplace_back(dist.at<float>(i, j), j);
+            }
+            std::sort(sim_index_pairs.begin(), sim_index_pairs.end(),
+                      [](const std::pair<float, int>& a, const std::pair<float, int>& b) {
+                          return a.first > b.first;
+                      });
+
+            for (int k = 0; k < topK && k < sim_index_pairs.size(); ++k) {
+                indices[i].push_back(sim_index_pairs[k].second);
+            }
+        }
+        return indices;
+    }
+
+    std::string model_path_;
+    cv::Size input_size_;
+    int output_dim_;
+    cv::Scalar mean_, std_;
+    int backend_id_;
+    int target_id_;
+    cv::dnn::Net model_;
+};
+
+// Read images from directory and return a pair of image list and file list
+std::pair<std::vector<cv::Mat>, std::vector<std::string>> readImagesFromDirectory(const std::string& img_dir, int w = 128, int h = 256) {
+    std::vector<cv::Mat> img_list;
+    std::vector<std::string> file_list;
+    
+    std::vector<std::string> file_names;
+    cv::glob(img_dir + "/*", file_names, false); 
+
+    for (size_t i = 0; i < file_names.size(); ++i) {
+        std::string file_name = file_names[i].substr(file_names[i].find_last_of("/\\") + 1); 
+        cv::Mat img = cv::imread(file_names[i]);
+        if (!img.empty()) {
+            cv::resize(img, img, cv::Size(w, h));
+            img_list.push_back(img);
+            file_list.push_back(file_name);
+        }
+    }
+    return std::make_pair(img_list, file_list);
+}
+
+// Visualize query and gallery results by creating concatenated images
+std::map<std::string, cv::Mat> visualize(
+    const std::map<std::string, std::vector<std::string>>& results,
+    const std::string& query_dir, 
+    const std::string& gallery_dir,
+    const cv::Size& output_size = cv::Size(128, 384)) {
+
+    std::map<std::string, cv::Mat> results_vis;
+
+    for (std::map<std::string, std::vector<std::string>>::const_iterator it = results.begin(); it != results.end(); ++it) {
+        const std::string& query_file = it->first;
+        const std::vector<std::string>& top_matches = it->second;
+
+        cv::Mat query_img = cv::imread(query_dir + "/" + query_file);
+        if (query_img.empty()) continue;
+
+        cv::resize(query_img, query_img, output_size);
+        cv::copyMakeBorder(query_img, query_img, 5, 5, 5, 5, 
+                           cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
+        cv::putText(query_img, "Query", cv::Point(10, 30), 
+                    cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 0), 2);
+
+        cv::Mat concat_img = query_img;
+
+        for (size_t i = 0; i < top_matches.size(); ++i) {
+            cv::Mat gallery_img = cv::imread(gallery_dir + "/" + top_matches[i]);
+            if (gallery_img.empty()) continue;
+
+            cv::resize(gallery_img, gallery_img, output_size);
+            cv::copyMakeBorder(gallery_img, gallery_img, 5, 5, 5, 5, 
+                               cv::BORDER_CONSTANT, cv::Scalar(255, 255, 255));
+            cv::putText(gallery_img, "G" + std::to_string(i), cv::Point(10, 30), 
+                        cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 0), 2);
+
+            cv::hconcat(concat_img, gallery_img, concat_img);
+        }
+        results_vis[query_file] = concat_img;
+    }
+    return results_vis;
+}
+
+void printHelpMessage() {
+    std::cout << "usage: demo.cpp [-h] [--query_dir QUERY_DIR] [--gallery_dir GALLERY_DIR] "
+              << "[--backend_target BACKEND_TARGET] [--topk TOPK] [--model MODEL] [--save] [--vis]\n\n"
+              << "ReID baseline models from Tencent Youtu Lab\n\n"
+              << "optional arguments:\n"
+              << "  -h, --help            show this help message and exit\n"
+              << "  --query_dir QUERY_DIR, -q QUERY_DIR\n"
+              << "                        Query directory.\n"
+              << "  --gallery_dir GALLERY_DIR, -g GALLERY_DIR\n"
+              << "                        Gallery directory.\n"
+              << "  --backend_target BACKEND_TARGET, -bt BACKEND_TARGET\n"
+              << "                        Choose one of the backend-target pair to run this demo: 0: (default) OpenCV implementation + "
+                 "CPU, 1: CUDA + GPU (CUDA), 2: CUDA + GPU (CUDA FP16), 3: TIM-VX + NPU, 4: CANN + NPU\n"
+              << "  --topk TOPK           Top-K closest from gallery for each query.\n"
+              << "  --model MODEL, -m MODEL\n"
+              << "                        Path to the model.\n"
+              << "  --save, -s            Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in "
+                 "case of camera input.\n"
+              << "  --vis, -v             Usage: Specify to open a new window to show results. Invalid in case of camera input.\n";
+}
+
+int main(int argc, char** argv) {
+    // CommandLineParser setup
+    cv::CommandLineParser parser(argc, argv,
+        "{help h | | Show help message.}"
+        "{query_dir q | | Query directory.}"
+        "{gallery_dir g | | Gallery directory.}"
+        "{backend_target bt | 0 | Choose one of the backend-target pair to run this demo: 0: (default) OpenCV implementation + CPU, "
+        "1: CUDA + GPU (CUDA), 2: CUDA + GPU (CUDA FP16), 3: TIM-VX + NPU, 4: CANN + NPU}"
+        "{topk k | 10 | Top-K closest from gallery for each query.}"
+        "{model m | person_reid_youtu_2021nov.onnx | Path to the model.}"
+        "{save s | false | Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.}"
+        "{vis v | false | Usage: Specify to open a new window to show results. Invalid in case of camera input.}");
+
+    if (parser.has("help")) {
+        printHelpMessage();
+        return 0;
+    }
+
+    std::string query_dir = parser.get<std::string>("query_dir");
+    std::string gallery_dir = parser.get<std::string>("gallery_dir");
+    int backend_target = parser.get<int>("backend_target");
+    int topK = parser.get<int>("topk");
+    std::string model_path = parser.get<std::string>("model");
+    bool save_flag = parser.get<bool>("save");
+    bool vis_flag = parser.get<bool>("vis");
+
+    if (!parser.check()) {
+        parser.printErrors();
+        return 1;
+    }
+
+    const std::vector<std::pair<int, int>> backend_target_pairs = {
+        {cv::dnn::DNN_BACKEND_OPENCV, cv::dnn::DNN_TARGET_CPU},
+        {cv::dnn::DNN_BACKEND_CUDA, cv::dnn::DNN_TARGET_CUDA},
+        {cv::dnn::DNN_BACKEND_CUDA, cv::dnn::DNN_TARGET_CUDA_FP16},
+        {cv::dnn::DNN_BACKEND_TIMVX, cv::dnn::DNN_TARGET_NPU},
+        {cv::dnn::DNN_BACKEND_CANN, cv::dnn::DNN_TARGET_NPU}
+    };
+
+    int backend_id = backend_target_pairs[backend_target].first;
+    int target_id = backend_target_pairs[backend_target].second;
+
+    YoutuReID reid(model_path, cv::Size(128, 256), 768, 
+                   cv::Scalar(0.485, 0.456, 0.406), 
+                   cv::Scalar(0.229, 0.224, 0.225), 
+                   backend_id, target_id);
+
+    std::pair<std::vector<cv::Mat>, std::vector<std::string>> query_data = readImagesFromDirectory(query_dir);
+    std::pair<std::vector<cv::Mat>, std::vector<std::string>> gallery_data = readImagesFromDirectory(gallery_dir);
+
+    std::vector<std::vector<int>> indices = reid.query(query_data.first, gallery_data.first, topK);
+
+    std::map<std::string, std::vector<std::string>> results;
+    for (size_t i = 0; i < query_data.second.size(); ++i) {
+        std::vector<std::string> top_matches;
+        for (int idx : indices[i]) {
+            top_matches.push_back(gallery_data.second[idx]);
+        }
+        results[query_data.second[i]] = top_matches;
+        std::cout << "Query: " << query_data.second[i] << "\n";
+        std::cout << "\tTop-" << topK << " from gallery: ";
+        for (size_t j = 0; j < top_matches.size(); ++j) {
+            std::cout << top_matches[j] << " ";
+        }
+        std::cout << std::endl;
+    }
+
+    std::map<std::string, cv::Mat> results_vis = visualize(results, query_dir, gallery_dir);
+
+    if (save_flag) {
+        for (std::map<std::string, cv::Mat>::iterator it = results_vis.begin(); it != results_vis.end(); ++it) {
+            std::string save_path = "result-" + it->first;
+            cv::imwrite(save_path, it->second);
+        }
+    }
+
+    if (vis_flag) {
+        for (std::map<std::string, cv::Mat>::iterator it = results_vis.begin(); it != results_vis.end(); ++it) {
+            cv::namedWindow("result-" + it->first, cv::WINDOW_AUTOSIZE);
+            cv::imshow("result-" + it->first, it->second);
+            cv::waitKey(0);
+            cv::destroyAllWindows();
+        }
+    }
+
+    return 0;
+}