OpenModelZoo
/
SiamFC

 
			
			   
				 
					
						
						
							
							/**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <dirent.h>
#include <gflags/gflags.h>
#include <opencv2/imgproc/types_c.h>
#include <sys/time.h>

#include <sstream>
#include <string>
#include <vector>
#include <algorithm>
#include <cmath>
#include <fstream>
#include <iosfwd>
#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/objdetect/objdetect.hpp>
#include <opencv2/opencv.hpp>

#include "inc/utils.h"
#include "include/api/context.h"
#include "include/api/model.h"
#include "include/api/serialization.h"
#include "include/api/types.h"
#include "include/dataset/execute.h"
#include "include/dataset/transforms.h"
#include "include/dataset/vision.h"
#include "include/dataset/vision_ascend.h"

using mindspore::Context;
using mindspore::DataType;
using mindspore::Graph;
using mindspore::GraphCell;
using mindspore::kSuccess;
using mindspore::Model;
using mindspore::ModelType;
using mindspore::MSTensor;
using mindspore::Serialization;
using mindspore::Status;
using mindspore::dataset::Execute;
using mindspore::dataset::TensorTransform;
using mindspore::dataset::transforms::TypeCast;
using mindspore::dataset::vision::Decode;
using mindspore::dataset::vision::HWC2CHW;
using mindspore::dataset::vision::Normalize;
using mindspore::dataset::vision::Resize;

DEFINE_string(model_path1, "/home/siamfc/model1.mindir", "model path");
DEFINE_string(model_path2, "/home/siamfc/model2_change.mindir", "model path");
DEFINE_int32(device_id, 0, "device id");
DEFINE_string(precision_mode, "allow_fp32_to_fp16", "precision mode");
DEFINE_string(op_select_impl_mode, "", "op select impl mode");
DEFINE_string(aipp_path, "./aipp.cfg", "aipp path");
DEFINE_string(device_target, "Ascend310", "device target");
DEFINE_string(code_path, "/home/Siamfc/", "code path");
DEFINE_string(seq_root_path, "/home/siamfc/OTB2013/", "OTB route");
std::vector<std::string> all_videos = {
    "Basketball",   "Bolt",      "Boy",       "Car4",     "CarDark",
    "CarScale",     "Coke",      "Couple",    "Crossing", "David",
    "David2",       "David3",    "Deer",      "Dog1",     "Doll",
    "Dudek",        "FaceOcc1",  "FaceOcc2",  "Fish",     "FleetFace",
    "Football",     "Football1", "Football1", "Freeman1", "Freeman3",
    "Freeman4",     "Girl",      "Ironman",   "Jogging",  "Jumping",
    "Lemming",      "Liquor",    "Matrix",    "Mhyang",   "MotorRolling",
    "MountainBike", "Shaking",   "Singer1",   "Singer2",  "Skating1",
    "Skiing",       "Soccer",    "Subway",    "Suv",      "Sylvester",
    "Tiger1",       "Tiger2",    "Trellis",   "Walking",  "Walking2",
    "Woman"};

struct param {
  const int* one = new int(1);
  size_t s_one = 4;
  size_t size_s;
  double init_x;
  double init_y;
  double init_w;
  double init_h;
  double target_position[2];
  double target_sz[2];
  double wc_z;
  double hc_z;
  double s_z;
  double scale_z;
  double penalty[3] = {0.9745, 1, 0.9745};
  double scales[3] = {0.96385542, 1.00, 1.0375};
  std::string dataset_path_txt;
  std::string record_name;
  std::string record_times;
  double s_x;
  double min_s_x;
  double max_s_x;
  double size_x_scales[3];
  std::vector<double> box;
  std::vector<std::string> all_files;
};

cv::Mat hwc2chw(Mat dst, size_t resize_detection) {
  std::vector<float> dst_data;
  std::vector<cv::Mat> bgrChannels(3);
  cv::split(dst, bgrChannels);
  for (size_t i = 0; i < bgrChannels.size(); i++) {
    std::vector<float> data = std::vector<float>(bgrChannels[i].reshape(1, 1));
    dst_data.insert(dst_data.end(), data.begin(), data.end());
  }
  cv::Mat srcMat;
  srcMat = cv::Mat(dst_data, true);
  cv::Mat dst_img = srcMat.reshape(3, resize_detection);
  return dst_img;
}

void pretreatment(const cv::Mat &src, cv::Mat *target, const param &config, const int &size,
                  const double &s_x) {
  cv::Mat cropImg = crop_and_pad(src, config.target_position[0],
                                 config.target_position[1], size, s_x);
  cv::Mat exemplar_FLOAT;
  cropImg.convertTo(exemplar_FLOAT, CV_32FC3);
  *target = hwc2chw(exemplar_FLOAT, size);
}

void init_position(param *config, const std::string& temp_video) {
  config->all_files = GetAllFiles(FLAGS_seq_root_path, temp_video);
  config->box = Getpos(config->dataset_path_txt);
  config->size_s = config->all_files.size();
  config->init_x = config->box[0] - 1;
  config->init_y = config->box[1] - 1;
  config->init_w = config->box[2];
  config->init_h = config->box[3];
  config->target_position[0] = config->init_x + (config->init_w - 1) / 2;
  config->target_position[1] = config->init_y + (config->init_h - 1) / 2;
  config->target_sz[0] = config->init_w;
  config->target_sz[1] = config->init_h;
  config->wc_z = config->init_w + 0.5 * (config->init_w + config->init_h);
  config->hc_z = config->init_h + 0.5 * (config->init_w + config->init_h);
  config->s_z = sqrt(config->wc_z * config->hc_z);
  config->scale_z = 127 / config->s_z;
  config->s_x = config->s_z + (255 - 127) / config->scale_z;
  config->min_s_x = 0.2 * config->s_x;
  config->max_s_x = 5 * config->s_x;
}

void getPath(param *config, const std::string& temp_video, int jogging_count) {
  config->dataset_path_txt =
      FLAGS_seq_root_path + "/" + temp_video + "/" + "groundtruth_rect.txt";
  config->record_name =
      FLAGS_code_path + "/results/OTB2013/SiamFC/" + temp_video + ".txt";
  config->record_times = FLAGS_code_path + "/results/OTB2013/SiamFC/times/" +
                        temp_video + "_time.txt";
  if (temp_video == "Jogging") {
    auto jogging_path = FLAGS_seq_root_path + "/" + temp_video + "/" +
                        "groundtruth_rect" + "." +
                        std::to_string(jogging_count) + ".txt";
    auto jogging_record = FLAGS_code_path + "/results/OTB2013/SiamFC/" +
                          temp_video + "." + std::to_string(jogging_count) +
                          ".txt";
    config->dataset_path_txt = jogging_path;
    config->record_name = jogging_record;
  }
}

void getSizeScales(param *config) {
  for (int k = 0; k < 3; k++) {
    config->size_x_scales[k] = config->s_x * config->scales[k];
  }
}

void getExemplar(const std::string& temp_video, std::vector<MSTensor> *outputs_exemplar,
                 std::vector<MSTensor> *inputs_exemplar, Model *model1,
                 param *config, int jogging_count) {
  getPath(config, temp_video, jogging_count);
  std::vector<MSTensor> model_inputs = model1->GetInputs();
  init_position(config, temp_video);
  cv::Mat src = cv::imread(config->all_files[0], cv::IMREAD_COLOR);
  cv::Mat exemplar;
  pretreatment(src, &exemplar, config, 127, config->s_z);
  cout << "box :" << config->box[0] << " " << config->box[1] << " "
       << config->box[2] << " " << config->box[3] << endl;
  size_t size_buffer = exemplar.size().width * exemplar.size().height * 4 * 3;
  mindspore::MSTensor image("x", mindspore::DataType::kNumberTypeFloat32,
                            {static_cast<int64_t>(3), static_cast<int64_t>(127),
                             static_cast<int64_t>(127)},
                            exemplar.data, size_buffer);
  inputs_exemplar->clear();
  inputs_exemplar->emplace_back(
      model_inputs[0].Name(), model_inputs[0].DataType(),
      model_inputs[0].Shape(), image.Data().get(), image.DataSize());
  inputs_exemplar->emplace_back(
      model_inputs[1].Name(), model_inputs[1].DataType(),
      model_inputs[1].Shape(), config->one, config->s_one);
  Status ret_instance;
  ret_instance = model1->Predict(inputs_exemplar, outputs_exemplar);  // get exemplar img
  if (ret_instance != kSuccess) {
    cout << " Failed predict" << endl;
  } else {
    cout << " Success predict" << endl;
  }
}

void preInstance(std::vector<MSTensor> *input_exemplar,
                 const std::vector<MSTensor>& outputs_exemplar,
                 std::vector<MSTensor> *output_exemplar,
                 const std::vector<MSTensor>& model_inputs_instance, Model *model2,
                 const MSTensor& instance) {
  input_exemplar->clear();
  input_exemplar->emplace_back(
      model_inputs_instance[0].Name(), model_inputs_instance[0].DataType(),
      model_inputs_instance[0].Shape(), outputs_exemplar[0].Data().get(),
      outputs_exemplar[0].DataSize());
  input_exemplar->emplace_back(model_inputs_instance[1].Name(), model_inputs_instance[1].DataType(),
                               model_inputs_instance[1].Shape(), instance.Data().get(), instance.DataSize());
  model2->Predict(input_exemplar, output_exemplar);
}

void getRetInstance(int instance_num, const std::vector<MSTensor>& outputs_exemplar, const Mat &cos_window,
                    param *config, Model *model2) {
  getSizeScales(config);
  std::vector<MSTensor> model_inputs_instance = model2->GetInputs();
  cv::Mat instance_src;
  instance_src = cv::imread(config->all_files[instance_num], cv::IMREAD_COLOR);
  cv::Mat exemplar_img[3];
  cv::Mat inputs_instance[3];
  cv::Mat response_mapInit[3];
  cv::Mat response_map[3];
  double response_map_max[3];
  std::vector<MSTensor> input_exemplar;
  std::vector<MSTensor> output_exemplar1;
  std::vector<MSTensor> output_exemplar2;
  std::vector<MSTensor> output_exemplar3;
  for (int n = 0; n < 3; n++) {
    pretreatment(instance_src, &exemplar_img[n], config, 255, config->size_x_scales[n]);
  }
  size_t size_buffer_instance = exemplar_img[0].size().width * exemplar_img[0].size().height * 3 * 4;
  mindspore::MSTensor instance1(
      "y", mindspore::DataType::kNumberTypeFloat32,
      {static_cast<int64_t>(3), static_cast<int64_t>(255),
       static_cast<int64_t>(255)},
      exemplar_img[0].data, size_buffer_instance);
  mindspore::MSTensor instance2(
      "y", mindspore::DataType::kNumberTypeFloat32,
      {static_cast<int64_t>(3), static_cast<int64_t>(255),
       static_cast<int64_t>(255)},
      exemplar_img[1].data, size_buffer_instance);
  mindspore::MSTensor instance3(
      "y", mindspore::DataType::kNumberTypeFloat32,
      {static_cast<int64_t>(3), static_cast<int64_t>(255),
       static_cast<int64_t>(255)},
      exemplar_img[2].data, size_buffer_instance);

  preInstance(&input_exemplar, outputs_exemplar, &output_exemplar1, model_inputs_instance, model2, instance1);
  preInstance(&input_exemplar, outputs_exemplar, &output_exemplar2, model_inputs_instance, model2, instance2);
  preInstance(&input_exemplar, outputs_exemplar, &output_exemplar3, model_inputs_instance, model2, instance3);
  response_mapInit[0] = cv::Mat(17, 17, CV_32FC1, output_exemplar1[0].MutableData());
  response_mapInit[1] = cv::Mat(17, 17, CV_32FC1, output_exemplar2[0].MutableData());
  response_mapInit[2] = cv::Mat(17, 17, CV_32FC1, output_exemplar3[0].MutableData());

  double minValue = 0;
  double maxValue = 0;
  for (int n = 0; n < 3; n++) {
    cv::resize(response_mapInit[n], response_map[n], Size(272, 272), 0, 0,
               cv::INTER_CUBIC);
    cv::minMaxIdx(response_map[n], &minValue, &maxValue, NULL, NULL);
    response_map_max[n] = maxValue * config->penalty[n];
  }
  int scale_index = std::max_element(response_map_max, response_map_max + 3) - response_map_max;
  cv::Mat response_map_up = response_map[scale_index];
  double minValue_response = 0;
  double maxValue_response = 0;
  cv::minMaxIdx(response_map_up, &minValue_response, &maxValue_response);
  response_map_up = response_map_up - minValue_response;
  Scalar sum_response = sum(response_map_up);
  response_map_up = response_map_up / sum_response[0];
  response_map_up = (1 - 0.176) * response_map_up + 0.176 * cos_window;
  cv::minMaxIdx(response_map_up, &minValue_response, &maxValue_response);

  cv::Point maxLoc;
  cv::minMaxLoc(response_map_up, NULL, NULL, NULL, &maxLoc);
  double maxLoc_x = static_cast<double>(maxLoc.x);
  double maxLoc_y = static_cast<double>(maxLoc.y);
  maxLoc_x -= (271 / 2);
  maxLoc_y -= (271 / 2);
  maxLoc_x /= 2;
  maxLoc_y /= 2;

  double scale = config->scales[scale_index];
  maxLoc_x = maxLoc_x * (config->s_x * scale) / 255;
  maxLoc_y = maxLoc_y * (config->s_x * scale) / 255;
  config->target_position[0] += maxLoc_x;
  config->target_position[1] += maxLoc_y;
  cout << " target_position[0]: " << config->target_position[0]
       << " target_position[1]:" << config->target_position[1] << endl;
  config->s_x = (0.41 + 0.59 * scale) * config->s_x;
  config->s_x = max(config->min_s_x, min(config->max_s_x, config->s_x));
  config->target_sz[0] = (0.41 + 0.59 * scale) * config->target_sz[0];
  config->target_sz[1] = (0.41 + 0.59 * scale) * config->target_sz[1];
  config->box[0] = config->target_position[0] + 1 - (config->target_sz[0]) / 2;
  config->box[1] = config->target_position[1] + 1 - (config->target_sz[1]) / 2;
  config->box[2] = config->target_sz[0];
  config->box[3] = config->target_sz[1];
}

void CreateHanningWindowWithCV_32F(Mat dst, const int &rows, const int &cols) {
  if (rows == 1 && cols == 1) {
    dst.at<float>(0, 0) = 1;
  } else if (rows == 1 && cols > 1) {
    float* dstData = dst.ptr<float>(0);
    for (int j = 0; j < cols; j++) {
      dstData[j] =
          0.5 * (1.0 - cos(2.0 * CV_PI * static_cast<double>(j) / static_cast<double>(cols - 1)));
    }
  } else if (rows > 1 && cols == 1) {
    for (int i = 0; i < rows; i++) {
      float* dstData = dst.ptr<float>(i);
      dstData[0] =
          0.5 * (1.0 - cos(2.0 * CV_PI * static_cast<double>(i) / static_cast<double>(rows - 1)));
    }
  } else {
    for (int i = 0; i < rows; i++) {
      float* dstData = dst.ptr<float>(i);
      double wr =
          0.5 * (1.0 - cos(2.0 * CV_PI * static_cast<double>(i) / static_cast<double>(rows - 1)));
      for (int j = 0; j < cols; j++) {
        double wc =
            0.5 * (1.0 - cos(2.0 * CV_PI * static_cast<double>(j) / static_cast<double>(cols - 1)));
        dstData[j] = static_cast<float>(wr * wc);
      }
    }
    sqrt(dst, dst);
  }
}

void CreateHanningWindowWithCV_64F(Mat dst, const int &rows, const int &cols) {
  if (rows == 1 && cols == 1) {
    dst.at<double>(0, 0) = 1;
  } else if (rows == 1 && cols > 1) {
    double* dstData = dst.ptr<double>(0);
    for (int j = 0; j < cols; j++) {
      dstData[j] =
          0.5 * (1.0 - cos(2.0 * CV_PI * static_cast<double>(j) / static_cast<double>(cols - 1)));
    }
  } else if (rows > 1 && cols == 1) {
    for (int i = 0; i < rows; i++) {
      double* dstData = dst.ptr<double>(i);
      dstData[0] =
          0.5 * (1.0 - cos(2.0 * CV_PI * static_cast<double>(i) / static_cast<double>(rows - 1)));
    }
  } else {
    for (int i = 0; i < rows; i++) {
      double* dstData = dst.ptr<double>(i);
      double wr =
          0.5 * (1.0 - cos(2.0 * CV_PI * static_cast<double>(i) / static_cast<double>(rows - 1)));
      for (int j = 0; j < cols; j++) {
        double wc =
            0.5 * (1.0 - cos(2.0 * CV_PI * static_cast<double>(j) / static_cast<double>(cols - 1)));
        dstData[j] = static_cast<double>(wr * wc);
      }
    }
    sqrt(dst, dst);
  }
}

void myCreateHanningWindow(OutputArray _dst, cv::Size winSize, int type) {
  CV_Assert(type == CV_32FC1 || type == CV_64FC1);
  _dst.create(winSize, type);
  Mat dst = _dst.getMat();
  int rows = dst.rows;
  int cols = dst.cols;
  if (dst.depth() == CV_32F) {
    CreateHanningWindowWithCV_32F(dst, rows, cols);
  } else {
    CreateHanningWindowWithCV_64F(dst, rows, cols);
  }
}

cv::Mat createMulHanningWindow(const cv::Size &winSize, int type) {
  int size1[2] = {1, winSize.width};
  cv::Mat selfhanning1(1, size1, CV_32FC1, cv::Scalar(0));
  myCreateHanningWindow(selfhanning1, cv::Size(1, winSize.width), CV_32FC1);
  int size2[2] = {winSize.height, 1};
  cv::Mat selfhanning2(1, size2, CV_32FC1, cv::Scalar(0));
  myCreateHanningWindow(selfhanning2, cv::Size(winSize.height, 1), CV_32FC1);
  cv::Mat mulHanning;
  mulHanning = selfhanning1 * selfhanning2;
  return mulHanning;
}

int Build(Model *model1,  Model *model2) {
  if (RealPath(FLAGS_model_path1).empty()) {
    std::cout << "Invalid model" << std::endl;
    return 1;
  }
  auto context = std::make_shared<Context>();
  auto ascend310_info = std::make_shared<mindspore::Ascend310DeviceInfo>();
  ascend310_info->SetDeviceID(FLAGS_device_id);
  context->MutableDeviceInfo().push_back(ascend310_info);
  // load  graph1
  Graph graph1;
  Status ret = Serialization::Load(FLAGS_model_path1, ModelType::kMindIR, &graph1);
  cout << "Load model success" << endl;
  if (ret != kSuccess) {
    std::cout << "Load model failed." << std::endl;
    return 1;
  }
  Status ret_build = model1->Build(GraphCell(graph1), context);
  if (ret_build != kSuccess) {
    std::cout << "ERROR: Build failed." << std::endl;
    return 1;
  } else {
    cout << "Build success  " << endl;
  }
  // load graph2
  Graph graph2;
  Status ret_graph2 = Serialization::Load(FLAGS_model_path2, ModelType::kMindIR, &graph2);
  if (ret_graph2 != kSuccess) {
    cout << " load graph2 failed" << endl;
    return 1;
  } else {
    cout << " load graph2 Success" << endl;
  }

  Status ret_build2 = model2->Build(GraphCell(graph2), context);
  if (ret_build2 != kSuccess) {
    cout << " build graph2 failed" << endl;
    return 1;
  } else {
    cout << " build graph2 Success" << endl;
  }

  auto all_files = GetAllFiles(FLAGS_seq_root_path, all_videos[0]);
  if (all_files.empty()) {
    std::cout << "ERROR: no input data." << std::endl;
    return 1;
  }
  return 0;
}

int main(int argc, char** argv) {
  gflags::ParseCommandLineFlags(&argc, &argv, true);
  Model model1;
  Model model2;
  if (Build(&model1, &model2) != 0) {
    cout << "Build model failed." << endl;
    return 1;
  }
  int jogging_count = 1;
  std::map<double, double> costTime_map;
  size_t size_v = all_videos.size();
  for (size_t i = 0; i < size_v; ++i) {
    param config;
    std::vector<MSTensor> inputs_exemplar;
    std::vector<MSTensor> outputs_exemplar;
    struct timeval start, end;
    double startTime_ms, endTime_ms, useTime_ms;
    gettimeofday(&start, NULL);
    getExemplar(all_videos[i], &outputs_exemplar, &inputs_exemplar, &model1, &config, jogging_count);
    cout << "record:" << config.record_name << "  " << config.record_times << endl;
    gettimeofday(&end, NULL);
    costTime_map.insert(std::pair<double, double>(startTime_ms, endTime_ms));
    ofstream outfile_record;
    ofstream outfile_times;
    outfile_times.open(config.record_times);
    outfile_record.open(config.record_name);
    startTime_ms = (1.0 * start.tv_sec * 1000000 + start.tv_usec) / 1000;
    endTime_ms = (1.0 * end.tv_sec * 1000000 + end.tv_usec) / 1000;
    useTime_ms = endTime_ms - startTime_ms;
    outfile_times << useTime_ms << std::endl;
    outfile_record << config.box[0] << "," << config.box[1] << "," << config.box[2] << "," << config.box[3] << endl;
    cv::Mat hann;
    hann = createMulHanningWindow(cv::Size(16 * 17, 16 * 17), CV_32FC1);
    Scalar sum_hann = sum(hann);
    cv::Mat cos_window = hann / sum_hann[0];  // create hanning
    // load graph2
    for (size_t j = 1; j < config.size_s; j++) {
      gettimeofday(&start, NULL);
      getRetInstance(j, outputs_exemplar, cos_window, &config, model2);
      gettimeofday(&end, NULL);
      costTime_map.insert(std::pair<double, double>(startTime_ms, endTime_ms));
      startTime_ms = (1.0 * start.tv_sec * 1000000 + start.tv_usec) / 1000;
      endTime_ms = (1.0 * end.tv_sec * 1000000 + end.tv_usec) / 100;
      useTime_ms = endTime_ms - startTime_ms;
      outfile_times << useTime_ms << std::endl;
      outfile_record << config.box[0] << "," << config.box[1] << "," << config.box[2] << "," << config.box[3] << endl;
    }
    if (all_videos[i] == "Jogging" && jogging_count == 1) {
      i--;
      jogging_count++;
    }
  }
  double average = 0.0;
  int infer_cnt = 0;
  for (auto iter = costTime_map.begin(); iter != costTime_map.end(); iter++) {
    double diff = 0.0;
    diff = iter->second - iter->first;
    average += diff;
    infer_cnt++;
  }

  average = average / infer_cnt;

  std::stringstream timeCost;
  timeCost << "NN inference cost average time: " << average << " ms of infer_count " << infer_cnt << std::endl;
  std::cout << "NN inference cost average time: " << average << "ms of infer_count " << infer_cnt << std::endl;
  std::string file_name = "./time_Result" + std::string("/test_perform_static.txt");
  std::ofstream file_stream(file_name.c_str(), std::ios::trunc);
  file_stream << timeCost.str();
  file_stream.close();
  costTime_map.clear();
  return 0;
}