optional CenternetOutputParameter centernet_output_param = 209;
message CenternetOutputParameterParameter { // Number of classes that are actually predicted. Required! optional uint32 num_classes = 1; optional uint32 kernel_size = 2 [default = 3]; optional float vis_threshold = 3 [default = 0.3]; optional bool apply_nms = 4 [default = false]; optional uint32 feature_map_h = 5 [default = 0]; optional uint32 feature_map_w = 6 [default = 0]; }
layer { name: "centernet_output" type: "CenternetOutput" bottom: "conv_blob55" bottom: "conv_blob57" bottom: "conv_blob59" top: "result_out" centernet_output_param { num_classes: 2 kernel_size: 3 vis_threshold: 0.3 } }
,否则会Error parsing text-format caffe.NetParameter: 2715:26: Message type "caffe.LayerParameter" has no field named "centernet_output_param"
,最好make clean
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { NOT_IMPLEMENTED; } virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { NOT_IMPLEMENTED; }
#ifndef CAFFE_CENTERNET_OUTPUT_LAYER_H #define CAFFE_CENTERNET_OUTPUT_LAYER_H #include <vector> #include "caffe/blob.hpp" #include "caffe/layer.hpp" #include "caffe/proto/caffe.pb.h" struct Box{ float x1; float y1; float x2; float y2; }; struct Detection{ //x1 y1 x2 y2 Box bbox; int classId; float prob; }; namespace caffe { /** * @brief Combine CenterNet (hm|wh|reg) layers to BoxOutput * */ template <typename Dtype> class CenternetOutputLayer : public Layer<Dtype> { public: explicit CenternetOutputLayer(const LayerParameter& param) : Layer<Dtype>(param) {} virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); // 返回层的名称 virtual inline const char* type() const { return "CenternetOutput"; } // 需要输入三个blob 最终输出一个blob virtual inline int ExactNumBottomBlobs() const { return 3; } virtual inline int ExactNumTopBlobs() const { return 1; } protected: virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); /// @brief Not implemented virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { NOT_IMPLEMENTED; } virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { NOT_IMPLEMENTED; } int classes_; int kernel_size_; float vis_thresh_; int h_; int w_; Blob<float> output_bufs_; }; } // namespace caffe #endif // CAFFE_CENTERNET_OUTPUT_LAYER_H
#include <vector> #include "caffe/layers/centernet_output_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { double sigmoid(double p){ return 1.0 / (1 + exp(-p * 1.0)); } template <typename Dtype> void CenternetOutputLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const CenternetOutputParameter& centernet_output_param = this->layer_param_.centernet_output_param(); CHECK(centernet_output_param.has_num_classes()) << "Must specify num_classes"; classes_ = centernet_output_param.num_classes(); kernel_size_ = centernet_output_param.kernel_size(); vis_thresh_ = centernet_output_param.vis_threshold(); // 这里的第一维度的第一个数字代表数量,其余5个置为0,从第二个维度开始实际存结果(存100个),因此1+100 output_bufs_.Reshape(101,6,1,1); } template <typename Dtype> void CenternetOutputLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { CHECK_EQ(bottom.size(), 3); Blob<Dtype>* hm_blob = bottom[0]; vector<int> hm_shape = hm_blob->shape(); h_ = hm_shape[2]; w_ = hm_shape[3]; vector<int> top_shape; // Since the number of bboxes to be kept is unknown before nms, we manually // set it to (fake) 1. top_shape.push_back(101); // Each row is a 6 dimension vector, which stores // [class, score, xmin, ymin, xmax, ymax] top_shape.push_back(6); top[0]->Reshape(top_shape); } template <typename Dtype> void CenternetOutputLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* hm_data = bottom[0]->cpu_data(); vector<int> hm_shape = bottom[0]->shape(); const Dtype* wh_data = bottom[1]->cpu_data(); const Dtype* reg_data = bottom[2]->cpu_data(); int classes = classes_; int feature_size = hm_shape[2]*hm_shape[3]; vector<vector<float>> fscore_max; for(int j=0; j < classes; j++) //class { for(int k=0; k < feature_size; k++) { vector<float> temp_inx; temp_inx.push_back(j*feature_size+k); // 位置 temp_inx.push_back(hm_data[j*feature_size+k]); // 分数 fscore_max.push_back(temp_inx); } } std::sort(fscore_max.begin(), fscore_max.end(),[](const std::vector<float>& a, const std::vector<float>& b){ return a[1] > b[1];}); // get top 100 int iters = std::min<int>(fscore_max.size(), 10); int only_threshbox=0; for(int i=0;i<iters;i++) { // 这里根据阈值进行筛选 fscore_max[i][1] = sigmoid(fscore_max[i][1]); if(fscore_max[i][1]<vis_thresh_) { break; } only_threshbox++; } vector<vector<float>> boxes; for(int i = 0; i < only_threshbox;i++) { vector<float> box; int index = ((int)fscore_max[i][0]) / (hm_shape[2] * hm_shape[3]); int center_index = ((int)fscore_max[i][0]) % (hm_shape[2]*hm_shape[3]) - hm_shape[3]; int cls = index; float xs= center_index % hm_shape[3]; float ys= int(center_index / hm_shape[3] ) % hm_shape[2]; //reg batch 1 xs += reg_data[(int)(((int)ys)*hm_shape[3] + xs)]; ys += reg_data[(int)(hm_shape[3]*hm_shape[2]+((int)ys)*hm_shape[3]+xs)]; float w = wh_data[(int)(((int)ys)*hm_shape[3]+xs)]; float h = wh_data[(int)(hm_shape[2]*hm_shape[3]+((int)ys)*hm_shape[3]+xs)]; box.push_back((float)cls); box.push_back((float)fscore_max[i][1]); box.push_back((float)(xs-w/2.0)); box.push_back((float)(ys-h/2.0)); box.push_back((float)(xs+w/2.0)); box.push_back((float)(ys+h/2.0)); // 输出四个点 boxes.push_back(box); } vector<int> top_shape; top_shape.push_back(boxes.size()); // Each row is a 6 dimension vector, which stores // [class, score, xmin, ymin, xmax, ymax] top_shape.push_back(6); Dtype* top_data; top[0]->Reshape(top_shape); top_data = top[0]->mutable_cpu_data(); printf("boxes size: %d \n", boxes.size()); for(int i = 0; i < boxes.size(); i ++) { vector<float> box = boxes[i]; top_data[i * 6] = box[0]; top_data[i * 6 + 1] = box[1]; top_data[i * 6 + 2] = box[2]; top_data[i * 6 + 3] = box[3]; top_data[i * 6 + 4] = box[4]; top_data[i * 6 + 5] = box[5]; } } #ifdef CPU_ONLY STUB_GPU_FORWARD(CenternetOutputLayer, Forward); #endif INSTANTIATE_CLASS(CenternetOutputLayer); REGISTER_LAYER_CLASS(CenternetOutput); } // namespace caffe
GPU版本的如下,后处理部分借鉴了 https://github.com/CaoWGG/TensorRT-CenterNet 这个github。
#include <vector> #include "caffe/layers/centernet_output_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { #ifndef BLOCK #define BLOCK 512 #endif template <typename Dtype> __device__ float Logist(Dtype data){ return 1./(1. + exp(-data)); } template <typename Dtype> __global__ void CTdetforward_kernel(const Dtype *hm, const Dtype *reg,const Dtype *wh , float *output,const int w,const int h,const int classes,const int kernel_size, const float visthresh ) { int idx = blockDim.x * blockIdx.x + threadIdx.x; if (idx >= w * h * classes) return; int padding = (kernel_size - 1) / 2; int offset = -padding; int stride = w * h; int grid_x = idx % w; int grid_y = (idx / w) % h; int cls = idx/w/h ; int l, m; int reg_index = idx - cls*stride; float c_x, c_y; float objProb = Logist(hm[idx]); if (objProb > visthresh) { float max = -1; int max_index = 0; for (l = 0; l < kernel_size; ++l) for (m = 0; m < kernel_size; ++m) { int cur_x = offset + l + grid_x; int cur_y = offset + m + grid_y; int cur_index = cur_y * w + cur_x + stride * cls; int valid = (cur_x >= 0 && cur_x < w && cur_y >= 0 && cur_y < h); float val = (valid != 0) ? Logist(hm[cur_index]) : -1; max_index = (val > max) ? cur_index : max_index; max = (val > max) ? val : max; } if(idx == max_index){ // 取出output的第一个地址的数值(存着box数量)并且加1 并且将结果存于当前地址 原子操作 int resCount = (int) atomicAdd(output, 1); // 每次都会避开一个sizeof(float)的地址(存着数量) char *data = (char *) output + sizeof(Detection) + resCount * sizeof(Detection); Detection *det = (Detection *) (data); c_x = grid_x + reg[reg_index]; c_y = grid_y + reg[reg_index + stride]; det->bbox.x1 = (c_x - wh[reg_index] / 2) * 4; det->bbox.y1 = (c_y - wh[reg_index + stride] / 2) * 4; det->bbox.x2 = (c_x + wh[reg_index] / 2) * 4; det->bbox.y2 = (c_y + wh[reg_index + stride] / 2) * 4; det->classId = cls; det->prob = objProb; } } } template <typename Dtype> void CenternetOutputLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { LOG(INFO) << "begin gpu"; Blob<Dtype>* hm_blob = bottom[0]; const Dtype* hm_data = hm_blob->gpu_data(); vector<int> hm_shape = hm_blob->shape(); Blob<Dtype>* wh_blob = bottom[1]; const Dtype* wh_data = wh_blob->gpu_data(); Blob<Dtype>* reg_blob = bottom[2]; const Dtype* reg_data = reg_blob->gpu_data(); // caffe初始化的GPU内存数值默认都为0 float* output_bufs = output_bufs_.mutable_gpu_data(); caffe_gpu_memset(sizeof(float), 0, output_bufs); uint num = w_ * h_ * classes_; int nbrBlocks = ceil((float)num / (float)BLOCK); CTdetforward_kernel<<<nbrBlocks,BLOCK>>>(hm_data,reg_data,wh_data,output_bufs,w_,h_,classes_, kernel_size_,vis_thresh_); // output_bufs_results为常量指针 指向的内容不能改变,但是可以改变指向 const float* output_bufs_results = output_bufs_.cpu_data(); int num_det = static_cast<int>(output_bufs_results[0]); std::cout<<"num_det: " << num_det <<std::endl; vector<int> top_shape; top_shape.push_back(num_det); top_shape.push_back(6); Dtype* top_data; top[0]->Reshape(top_shape); top_data = top[0]->mutable_cpu_data(); const float* temp_bufs_result = output_bufs_results + 6; for(int i = 0; i < num_det; i ++) { top_data[i * 6] = temp_bufs_result[4]; top_data[i * 6 + 1] = temp_bufs_result[5]; top_data[i * 6 + 2] = temp_bufs_result[0]; top_data[i * 6 + 3] = temp_bufs_result[1]; top_data[i * 6 + 4] = temp_bufs_result[2]; top_data[i * 6 + 5] = temp_bufs_result[3]; temp_bufs_result += 6; } CUDA_POST_KERNEL_CHECK; } INSTANTIATE_LAYER_GPU_FUNCS(CenternetOutputLayer); } // namespace caffe
std::vector<vector<float> > CenterNet_Detector::Detect(const cv::Mat& img) { int count = 1; std::vector<vector<float> > rlt; Blob<float>* input_layer = net_->input_blobs()[0]; typedef std::chrono::duration<double, std::ratio<1, 1000>> ms; auto total_t0 = std::chrono::high_resolution_clock::now(); auto t0 = std::chrono::high_resolution_clock::now(); // 这里按照实际输入图像的长宽设定模型的输入大小 input_layer->Reshape(1, num_channels_, img.rows, img.cols); input_geometry_ = cv::Size(input_layer->width(), input_layer->height()); net_->Reshape(); auto t1 = std::chrono::high_resolution_clock::now(); double reshape_time = std::chrono::duration_cast<ms>(t1 - t0).count(); std::cout << "Caffe Reshape time: " << std::fixed << std::setprecision(2) << reshape_time << " ms" << std::endl; t0 = std::chrono::high_resolution_clock::now(); std::vector<cv::Mat> input_channels; WrapInputLayer(&input_channels); cv::Mat tm = Preprocess(img, &input_channels); t1 = std::chrono::high_resolution_clock::now(); double preprocess_time = std::chrono::duration_cast<ms>(t1 - t0).count(); std::cout << "Preprocess time: " << std::fixed << std::setprecision(2) << preprocess_time << " ms" << std::endl; while(count--) { t0 = std::chrono::high_resolution_clock::now(); net_->Forward(); t1 = std::chrono::high_resolution_clock::now(); double net_time = std::chrono::duration_cast<ms>(t1 - t0).count(); std::cout << "Net processing time: " << std::fixed << std::setprecision(2) << net_time << " ms" << std::endl; Blob<float>* result_blob = net_->output_blobs()[0]; const float* result = result_blob->cpu_data(); std::cout<<"result shpae: " << result_blob->shape_string()<<std::endl; const int num_det = result_blob->num(); std::cout<<"num_det: " << num_det <<std::endl; vector<vector<float> > predictions; for(int i = 0; i < num_det; ++i) { vector<float> prediction; prediction.push_back(result[0]); prediction.push_back(result[1]); printf("score: %f ", result[1]); prediction.push_back(result[2]); prediction.push_back(result[3]); prediction.push_back(result[4]); prediction.push_back(result[5]); // printf("after x1 y1 x2 y2: %f %f %f %f\n", result[2],result[3],result[4],result[5]); predictions.push_back(prediction); result += 6; } Mat temp = img; std::cout << "boxes nums " << predictions.size() << std::endl; double total_time = std::chrono::duration_cast<ms>(t1 - total_t0).count(); std::cout << "Total time: " << std::fixed << std::setprecision(2) << total_time << " ms" << std::endl; for(int i = 0; i < predictions.size(); i ++) { vector<float> prediction; prediction = predictions[i]; cv::rectangle(temp,cv::Point((int)(prediction[2]),(int)(prediction[3])),cv::Point((int)(prediction[4]),(int)(prediction[5])),cv::Scalar(0,0,255),1,1,0); } cv::imwrite("image_result.jpg", temp); } return rlt; }
遇到identifier "nullptr" is undefined
NVCCFLAGS += -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS) -std=c++11