Caffe源码中Pooling Layer文件分析-天翼云

Caffe源码(caffe version commit: 09868ac , date: 2015.08.15)中有一些重要的头文件，这里介绍下include/caffe/vision_layers文件中PoolingLayer类，在最新版caffe中，PoolingLayer类被单独放在了include/caffe/layers/pooling_layer.hpp文件中，这两个文件中PoolingLayer类的内容及实现是完全一致的：

1． include文件：

(1)、<caffe/blob.hpp>

(2)、<caffe/layer.hpp>

(3)、<caffe/proto/caffe.pb.h>

2．类PoolingLayer：池化层，Layer类的子类

Pooling layer的主要作用是降维，缩小feature map，图像降采样，方法有：

(1)、均值采样：取区域平均值作为降采样值；

(2)、最大值采样：取区域最大值作为降采样值；

(3)、随机采样：取区域内随机一个像素值。

<caffe/layers/pooling_layer.hpp>文件的详细介绍如下：

#ifndef CAFFE_POOLING_LAYER_HPP_
#define CAFFE_POOLING_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

namespace caffe {
/**
 * @brief Pools the input image by taking the max, average, etc. within regions.
 *
 * TODO(dox): thorough documentation for Forward, Backward, and proto params.
 */
// 池化层，Layer类的子类，图像降采样，有三种Pooling方法：Max、Avx、Stochastic
template <typename Dtype>
class PoolingLayer : public Layer<Dtype> {
 public:
// 显示构造函数
  explicit PoolingLayer(const LayerParameter& param) : Layer<Dtype>(param) {}
// 参数初始化，通过类PoolingParameter获取成员变量值，包括：
// global_pooling_、kernel_h_、kernel_w_、pad_h_、pad_w_、stride_h_、stride_w_
  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
// 调整top blobs的shape，并有可能会reshape rand_idx_或max_idx_；
// 获取成员变量值，包括：channels_、height_、width_、pooled_height_、pooled_width_
  virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
// 获得Pooling layer的类型: Pooling
  virtual inline const char* type() const { return "Pooling"; }
// 获得Pooling layer所需的bottom blobs的个数: 1
  virtual inline int ExactNumBottomBlobs() const { return 1; }
// 获得Pooling layer所需的bottom blobs的最少个数: 1
  virtual inline int MinTopBlobs() const { return 1; }
  // MAX POOL layers can output an extra top blob for the mask;
  // others can only output the pooled inputs.
// 获得Pooling layer所需的bottom blobs的最多个数: Max为2，其它(Avg, Stochastic)为1
  virtual inline int MaxTopBlobs() const {
    return (this->layer_param_.pooling_param().pool() ==
            PoolingParameter_PoolMethod_MAX) ? 2 : 1;
  }

 protected:
// CPU实现Pooling layer的前向传播，仅有Max和Ave两种方法实现
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
// GPU实现Pooling layer的前向传播，Max、Ave、Stochastic三种方法实现
  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
// CPU实现Pooling layer的反向传播，仅有Max和Ave两种方法实现
  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
// GPU实现Pooling layer的反向传播，Max、Ave、Stochastic三种方法实现
  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

// Caffe中类的成员变量名都带有后缀"_"，这样就容易区分临时变量和类成员变量
  int kernel_h_, kernel_w_; // 滤波器(卷积核)大小
  int stride_h_, stride_w_; // 步长大小
  int pad_h_, pad_w_; // 图像扩充大小
  int channels_; // 图像通道数
  int height_, width_; // 图像高、宽
// 池化后图像高、宽
// pooled_height_ = (height_ + 2 * pad_h_ - kernel_h_) / stride_h_)) + 1
// pooled_width_ = (width_ + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1
  int pooled_height_, pooled_width_;
  bool global_pooling_; // 是否全区域池化(将整幅图像降采样为1*1)
  Blob<Dtype> rand_idx_; // 随机采样索引,Pooling方法为STOCHASTIC时用到并会Reshape
  Blob<int> max_idx_; // 最大值采样索引，Pooling方法为MAX时用到并会Reshape
};

}  // namespace caffe

#endif  // CAFFE_POOLING_LAYER_HPP_

在caffe.proto文件中，有一个message是与pooling layer 相关的，如下：

message PoolingParameter { // Pooling层参数类
  enum PoolMethod { // 枚举类型，Pooling的方法：Max(最大值采样)、AVE(均值采样)、STOCHASTIC(随机采样)
    MAX = 0;
    AVE = 1;
    STOCHASTIC = 2;
  }
  optional PoolMethod pool = 1 [default = MAX]; // The pooling method, pooling方法
  // Pad, kernel size, and stride are all given as a single value for equal
  // dimensions in height and width or as Y, X pairs.
  optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X)，图像扩充大小(添加图像边界的像素大小)
  optional uint32 pad_h = 9 [default = 0]; // The padding height,图像扩充大小，Y
  optional uint32 pad_w = 10 [default = 0]; // The padding width，图像扩充大小，X
  optional uint32 kernel_size = 2; // The kernel size (square)，滤波器(卷积核、滑动窗)的大小(高=宽)
  optional uint32 kernel_h = 5; // The kernel height，滤波器(卷积核、滑动窗)的高
  optional uint32 kernel_w = 6; // The kernel width，滤波器(卷积核、滑动窗)的宽
  optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X),滑动步长(高=宽)，卷积核卷积时平移的步幅
  optional uint32 stride_h = 7; // The stride height,滑动步长，高
  optional uint32 stride_w = 8; // The stride width，滑动步长，宽
  enum Engine {
    DEFAULT = 0;
    CAFFE = 1;
    CUDNN = 2;
  }
  optional Engine engine = 11 [default = DEFAULT]; //
  // If global_pooling then it will pool over the size of the bottom by doing
  // kernel_h = bottom->height and kernel_w = bottom->width
  optional bool global_pooling = 12 [default = false]; // 是否是全区域池化
}

pooling layer的测试代码如下：

#include "funset.hpp"
#include <string>
#include <vector>
#include "common.hpp"

int test_caffe_layer_pooling()
{
	caffe::Caffe::set_mode(caffe::Caffe::CPU); // set run caffe mode

	// set layer parameter
	caffe::LayerParameter layer_param;
	layer_param.set_phase(caffe::Phase::TRAIN);

	// cv::Mat -> caffe::Blob
	std::string image_name = "E:/GitCode/Caffe_Test/test_data/images/a.jpg";
	cv::Mat mat1 = cv::imread(image_name, 1);
	if (!mat1.data) {
		fprintf(stderr, "read image fail: %s\n", image_name.c_str());
		return -1;
	}
	mat1.convertTo(mat1, CV_32FC3);
	std::vector<cv::Mat> mat2;
	cv::split(mat1, mat2);
	std::vector<int> mat_reshape{ 1, (int)mat2.size(), mat2[0].rows, mat2[0].cols };

	caffe::Blob<float> blob;
	blob.Reshape(mat_reshape);
	size_t size = mat2[0].rows * mat2[0].cols;
	float* data = new float[mat2.size() * size];
	memcpy(data, mat2[0].data, size * sizeof(float));
	memcpy(data + size, mat2[1].data, size * sizeof(float));
	memcpy(data + 2 * size, mat2[2].data, size * sizeof(float));
	blob.set_cpu_data(data);

	for (int method = 0; method < 2; ++method) {
		// set pooling parameter
		caffe::PoolingParameter* pooling_param = layer_param.mutable_pooling_param();
		if (method == 0) pooling_param->set_pool(caffe::PoolingParameter::MAX);
		else pooling_param->set_pool(caffe::PoolingParameter::AVE);
		pooling_param->set_kernel_size(3);
		pooling_param->set_pad(2);
		pooling_param->set_stride(2);
		pooling_param->set_global_pooling(false);

		std::vector<caffe::Blob<float>*> bottom_blob{ &blob }, top_blob{ &caffe::Blob<float>()/*, &caffe::Blob<float>() */ };

		// test PoolingLayer function
		caffe::PoolingLayer<float> pooling_layer(layer_param);
		pooling_layer.SetUp(bottom_blob, top_blob);
		fprintf(stderr, "top blob info: channels: %d, height: %d, width: %d\n",
			top_blob[0]->channels(), top_blob[0]->height(), top_blob[0]->width());

		pooling_layer.Forward(bottom_blob, top_blob);

		int height = top_blob[0]->height();
		int width = top_blob[0]->width();
		const float* p = top_blob[0]->cpu_data();
		std::vector<cv::Mat> mat3{ cv::Mat(height, width, CV_32FC1, (float*)p),
			cv::Mat(height, width, CV_32FC1, (float*)(p + height * width)),
			cv::Mat(height, width, CV_32FC1, (float*)(p + height * width * 2)) };
		cv::Mat mat4;
		cv::merge(mat3, mat4);
		mat4.convertTo(mat4, CV_8UC3);
		if (method == 0) image_name = "E:/GitCode/Caffe_Test/test_data/images/forward0.jpg";
		else image_name = "E:/GitCode/Caffe_Test/test_data/images/forward1.jpg";
		cv::imwrite(image_name, mat4);

		for (int i = 0; i < bottom_blob[0]->count(); ++i)
			bottom_blob[0]->mutable_cpu_diff()[i] = bottom_blob[0]->cpu_data()[i];
		for (int i = 0; i < top_blob[0]->count(); ++i)
			top_blob[0]->mutable_cpu_diff()[i] = top_blob[0]->cpu_data()[i];

		std::vector<bool> propagate_down{ true };
		pooling_layer.Backward(top_blob, propagate_down, bottom_blob);

		height = bottom_blob[0]->height();
		width = bottom_blob[0]->width();
		p = bottom_blob[0]->cpu_diff();
		std::vector<cv::Mat> mat5{ cv::Mat(height, width, CV_32FC1, (float*)p),
			cv::Mat(height, width, CV_32FC1, (float*)(p + height * width)),
			cv::Mat(height, width, CV_32FC1, (float*)(p + height * width * 2)) };
		cv::Mat mat6;
		cv::merge(mat5, mat6);
		mat6.convertTo(mat6, CV_8UC3);
		if (method == 0) image_name = "E:/GitCode/Caffe_Test/test_data/images/backward0.jpg";
		else image_name = "E:/GitCode/Caffe_Test/test_data/images/backward1.jpg";
		cv::imwrite(image_name, mat6);
	}

	delete[] data;
	return 0;
}

图像结果(Lena.jpg)如下：前向传播、反向传播，前两幅为Max，后两幅为Avg方法

智算服务

应用商城

合作伙伴

开发者

支持与服务

了解天翼云

Caffe源码中Pooling Layer文件分析

Caffe源码中Pooling Layer文件分析

相关文章

iOS开发CoreAnimation解读之三——几种常用Layer的使用解析

jquery ajax 使用layer的超时提示

Layui 内置方法 - layer.tips(tips层 )

Layui 内置方法 - layer.tab(tab层)

作者介绍

最新文章

iOS开发CoreAnimation解读之三——几种常用Layer的使用解析

热门文章

iOS开发CoreAnimation解读之三——几种常用Layer的使用解析

热门标签

相关产品

弹性云主机

天翼云电脑（公众版）

对象存储

云硬盘

随机文章

iOS开发CoreAnimation解读之三——几种常用Layer的使用解析

活动

智算服务

应用商城

合作伙伴

开发者

支持与服务

了解天翼云

Caffe源码中Pooling Layer文件分析

Caffe源码中Pooling Layer文件分析

相关文章

iOS开发CoreAnimation解读之三——几种常用Layer的使用解析

jquery ajax 使用layer的超时提示

Layui 内置方法 - layer.tips(tips层 )

Layui 内置方法 - layer.tab(tab层)

作者介绍

最新文章

iOS开发CoreAnimation解读之三——几种常用Layer的使用解析

热门文章

iOS开发CoreAnimation解读之三——几种常用Layer的使用解析

热门标签

相关产品

弹性云主机

天翼云电脑（公众版）

对象存储

云硬盘

随机文章

iOS开发CoreAnimation解读之三——几种常用Layer的使用解析