Caffe如何增加新的Loss层

一、在caffe.proto中增加对应的LayerParameter和message 点击此处返回总目录

二、在include\caffe\layers中增加该layer的类的声明.h文件

三、进行类的实现

四、重新编译Caffe

一、在caffe.proto中增加对应的LayerParameter和message

修改src/caffe/proto/caffe.proto文件：

（1）修改309行的注释

Caffe如何增加新的Loss层

（2）在message LayerParameter 中添加下面这行：

Caffe如何增加新的Loss层

（3）在caffe.proto中的某个与message LayerParameter 平级的位置添加下面这段代码：

Caffe如何增加新的Loss层

二、在include\caffe\layers中增加该layer的类的声明.h文件

include\caffe\layers\hashing_loss_layer.hpp

#ifndef CAFFE_HASHING_LOSS_LAYER_HPP_
#define CAFFE_HASHING_LOSS_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

#include "caffe/layers/loss_layer.hpp"

namespace caffe {

template <typename Dtype>
class HashingLossLayer : public LossLayer<Dtype> {
public:
explicit HashingLossLayer(const LayerParameter& param)
: LossLayer<Dtype>(param), diff_() {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline int ExactNumBottomBlobs() const { return 2; }
virtual inline const char* type() const { return "HashingLoss"; }
virtual inline bool AllowForceBackward(const int bottom_index) const {
return bottom_index != 1;
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

Blob<Dtype> diff_; // cached for backward pass
Blob<Dtype> dist_sq_; // cached for backward pass
Blob<Dtype> diff_sq_; // tmp storage for forward pass
Blob<Dtype> summer_vec_; // tmp storage for forward pass
};

} // namespace caffe

#endif // CAFFE_HASHING_LOSS_LAYER_HPP_

三、进行类的实现

在/src/caffe/layers/目录下新建hashing_loss_layer.cpp和hashing_loss_layer.cu文件。（.cpp是cpu版本的实现，.cu是GPU版本的实现）

cu文件是实现GPU上前向传播(Forward_gpu)，反向传播（Backward_gpu）进行并行加速的，不是必须选项，如果对CUDA编程不熟悉可以不实现第三个hashing_loss_layer.cu函数，也就是不往caffe_root/src/caffe/layers/下添加.cu文件即可。这里只在hashing_loss_layer.cpp中实现cpu版本的前向传播(Forward_cpu)，反向传播（Backward_cpu）函数。

src\caffe\layers\hashing_loss_layer.cpp

#include <algorithm>
#include <vector>

#include "caffe/layers/hashing_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

template <typename Dtype>
void HashingLossLayer<Dtype>::LayerSetUp(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
LossLayer<Dtype>::LayerSetUp(bottom, top);
CHECK_EQ(bottom[0]->height(), 1);
CHECK_EQ(bottom[0]->width(), 1);
CHECK_EQ(bottom[1]->height(), 1);
CHECK_EQ(bottom[1]->width(), 1);
diff_.Reshape(1, bottom[0]->channels(), 1, 1);
// vector of ones used to sum along channels
summer_vec_.Reshape(bottom[0]->channels(), 1, 1, 1);
for (int i = 0; i < bottom[0]->channels(); ++i)
summer_vec_.mutable_cpu_data()[i] = Dtype(1);
}

template <typename Dtype>
void HashingLossLayer<Dtype>::Forward_cpu(
const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// initialize parameters
Dtype* bout = bottom[0]->mutable_cpu_diff();
const int num = bottom[0]->num();
const Dtype alpha = top[0]->cpu_diff()[0] / static_cast<Dtype>(num * (num - 1));
const Dtype beta = top[0]->cpu_diff()[0] / static_cast<Dtype>(num);
const int channels = bottom[0]->channels();
Dtype margin = this->layer_param_.hashing_loss_param().bi_margin();
Dtype tradeoff = this->layer_param_.hashing_loss_param().tradeoff();
const int label_num = bottom[1]->count() / num;
bool sim;
Dtype loss(0.0);
Dtype reg(0.0);
Dtype data(0.0);
Dtype dist_sq(0.0);
caffe_set(channels*num, Dtype(0), bout);
// calculate loss and gradient
for (int i = 0; i < num; ++i) {
for (int j=i+1; j < num; ++j){
caffe_sub(
   channels,
   bottom[0]->cpu_data()+(i*channels), // a
   bottom[0]->cpu_data()+(j*channels), // b
   diff_.mutable_cpu_data()); // a_i-b_i
dist_sq = caffe_cpu_dot(channels, diff_.cpu_data(), diff_.cpu_data()); //D_w^2
if (label_num > 1) {
sim = caffe_cpu_dot(label_num, bottom[1]->cpu_data() + (i * label_num), bottom[1]->cpu_data() + (j * label_num)) > 0;
}
else {
   sim = ((static_cast<int>(bottom[1]->cpu_data()[i])) == (static_cast<int>(bottom[1]->cpu_data()[j])));
}
if (sim) { // similar pairs
loss += dist_sq;
// gradient with respect to the first sample
   caffe_cpu_axpby(
channels,
alpha,
diff_.cpu_data(),
Dtype(1.0),
bout + (i*channels));
// gradient with respect to the second sample
caffe_cpu_axpby(
channels,
-alpha,
diff_.cpu_data(),
Dtype(1.0),
bout + (j*channels));
}
else { // dissimilar pairs
loss += std::max(margin - dist_sq, Dtype(0.0));
if ((margin-dist_sq) > Dtype(0.0)) {
// gradient with respect to the first sample
caffe_cpu_axpby(
channels,
-alpha,
diff_.cpu_data(),
Dtype(1.0),
bout + (i*channels));
// gradient with respect to the second sample
caffe_cpu_axpby(
channels,
alpha,
diff_.cpu_data(),
Dtype(1.0),
bout + (j*channels));
}
}
}
for (int k = 0; k < channels;k++){
data = *(bottom[0]->cpu_data()+(i*channels)+k);
// gradient corresponding to the regularizer
*(bout + (i*channels) + k) += beta * tradeoff * (((data>=Dtype(1.0))||(data<=Dtype(0.0)&&data>=Dtype(-1.0)))?Dtype(1.0):Dtype(-1.0));
data = std::abs(data)-1;
reg += std::abs(data);
}
}
loss = loss / static_cast<Dtype>(bottom[0]->num()*(bottom[0]->num()-1));
loss += tradeoff * reg /static_cast<Dtype>(bottom[0]->num());
top[0]->mutable_cpu_data()[0] = loss;
}

template <typename Dtype>
void HashingLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
if (propagate_down[1]) {
LOG(FATAL) << this->type()
<< " Layer cannot backpropagate to label inputs.";
}
}

#ifdef CPU_ONLY
STUB_GPU(HashingLossLayer);
#endif

INSTANTIATE_CLASS(HashingLossLayer);
REGISTER_LAYER_CLASS(HashingLoss);

} // namespace caffe

四、重新编译Caffe

make clean

make all -j8