caffe 损失函数激活函数怎么设定

caffe中对6种激活函数类的封装--ReLu
上文中提到了6种激活函数,本文主要是对于6种激活函数在caffe中的实现代码进行一下代码解析。按照上文的顺序:ReLu、Sigmod、Tanh、Absval、Power、BNLL;
【在caffe中的实现主要在两个文件中,.hpp(头文件)和.cpp,下面也是对这两部分代码的解析;
& .hpp所在路径:~/caffe-master/include/caffe & .cpp所在路径:~/caffe-master/src/caffe&】&
& &ReLu_layer.hpp
#ifndef CAFFE_RELU_LAYER_HPP_
#define CAFFE_RELU_LAYER_HPP_
#include &vector&
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/layers/neuron_layer.hpp"
namespace caffe {
* @brief Rectified Linear Unit non-linearity @f$ y = \max(0, x) @f$.
The simple max is fast to compute, and the function does not saturate.
template &typename Dtype&
class ReLULayer : public NeuronLayer&Dtype& {
* @param param provides ReLUParameter relu_param,
with ReLULayer options:
- negative_slope (\b optional, default 0).
the value @f$ \nu @f$ by which negative values are multiplied.
explicit ReLULayer(const LayerParameter& param)
: NeuronLayer&Dtype&(param) {}
virtual inline const char* type() const { return "ReLU"; }
protected:
* @param bottom input Blob vector (length 1)
-# @f$ (N \times C \times H \times W) @f$
the inputs @f$ x @f$
* @param top output Blob vector (length 1)
-# @f$ (N \times C \times H \times W) @f$
the computed outputs @f$
y = \max(0, x)
@f$ by default.
If a non-zero negative_slope @f$ \nu @f$ is provided,
the computed outputs are @f$ y = \max(0, x) + \nu \min(0, x) @f$.
virtual void Forward_cpu(const vector&Blob&Dtype&*&& bottom,
const vector&Blob&Dtype&*&& top);
virtual void Forward_gpu(const vector&Blob&Dtype&*&& bottom,
const vector&Blob&Dtype&*&& top);
* @brief Computes the error gradient w.r.t. the ReLU inputs.
* @param top output Blob vector (length 1), providing the error gradient with
respect to the outputs
-# @f$ (N \times C \times H \times W) @f$
containing error gradients @f$ \frac{\partial E}{\partial y} @f$
with respect to computed outputs @f$ y @f$
* @param propagate_down see Layer::Backward.
* @param bottom input Blob vector (length 1)
-# @f$ (N \times C \times H \times W) @f$
the inputs @f$ x @f$; Backward fills their diff with
gradients @f$
\frac{\partial E}{\partial x} = \left\{
\begin{array}{lr}
0 & \mathrm{if} \; x \le 0 \\
\frac{\partial E}{\partial y} & \mathrm{if} \; x & 0
\end{array} \right.
@f$ if propagate_down[0], by default.
If a non-zero negative_slope @f$ \nu @f$ is provided,
the computed gradients are @f$
\frac{\partial E}{\partial x} = \left\{
\begin{array}{lr}
\nu \frac{\partial E}{\partial y} & \mathrm{if} \; x \le 0 \\
\frac{\partial E}{\partial y} & \mathrm{if} \; x & 0
\end{array} \right.
virtual void Backward_cpu(const vector&Blob&Dtype&*&& top,
const vector&bool&& propagate_down, const vector&Blob&Dtype&*&& bottom);
virtual void Backward_gpu(const vector&Blob&Dtype&*&& top,
const vector&bool&& propagate_down, const vector&Blob&Dtype&*&& bottom);
// namespace caffe
// CAFFE_RELU_LAYER_HPP_
ReLu_layer.cpp
#include &algorithm&
#include &vector&
#include "caffe/layers/relu_layer.hpp"
namespace caffe {
template &typename Dtype&
void ReLULayer&Dtype&::Forward_cpu(const vector&Blob&Dtype&*&& bottom,
const vector&Blob&Dtype&*&& top) {
const Dtype* bottom_data = bottom[0]-&cpu_data();
Dtype* top_data = top[0]-&mutable_cpu_data();
const int count = bottom[0]-&count();
Dtype negative_slope = this-&layer_param_.relu_param().negative_slope();
for (int i = 0; i & ++i) {
top_data[i] = std::max(bottom_data[i], Dtype(0))
+ negative_slope * std::min(bottom_data[i], Dtype(0));
template &typename Dtype&
void ReLULayer&Dtype&::Backward_cpu(const vector&Blob&Dtype&*&& top,
const vector&bool&& propagate_down,
const vector&Blob&Dtype&*&& bottom) {
if (propagate_down[0]) {
const Dtype* bottom_data = bottom[0]-&cpu_data();
const Dtype* top_diff = top[0]-&cpu_diff();
Dtype* bottom_diff = bottom[0]-&mutable_cpu_diff();
const int count = bottom[0]-&count();
Dtype negative_slope = this-&layer_param_.relu_param().negative_slope();
for (int i = 0; i & ++i) {
bottom_diff[i] = top_diff[i] * ((bottom_data[i] & 0)
+ negative_slope * (bottom_data[i] &= 0));
#ifdef CPU_ONLY
STUB_GPU(ReLULayer);
INSTANTIATE_CLASS(ReLULayer);
// namespace caffe
ReLu_layer.cu(GPU版本的实现)
#include &algorithm&
#include &vector&
#include "caffe/layers/relu_layer.hpp"
namespace caffe {
template &typename Dtype&
__global__ void ReLUForward(const int n, const Dtype* in, Dtype* out,
Dtype negative_slope) {
CUDA_KERNEL_LOOP(index, n) {
out[index] = in[index] & 0 ? in[index] : in[index] * negative_
template &typename Dtype&
void ReLULayer&Dtype&::Forward_gpu(const vector&Blob&Dtype&*&& bottom,
const vector&Blob&Dtype&*&& top) {
const Dtype* bottom_data = bottom[0]-&gpu_data();
Dtype* top_data = top[0]-&mutable_gpu_data();
const int count = bottom[0]-&count();
Dtype negative_slope = this-&layer_param_.relu_param().negative_slope();
// NOLINT_NEXT_LINE(whitespace/operators)
ReLUForward&Dtype&&&&CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS&&&(
count, bottom_data, top_data, negative_slope);
CUDA_POST_KERNEL_CHECK;
// && " count: " && count && " bottom_data: "
&& (unsigned long)bottom_data
&& " top_data: " && (unsigned long)top_data
&& " blocks: " && CAFFE_GET_BLOCKS(count)
&& " threads: " && CAFFE_CUDA_NUM_THREADS;
template &typename Dtype&
__global__ void ReLUBackward(const int n, const Dtype* in_diff,
const Dtype* in_data, Dtype* out_diff, Dtype negative_slope) {
CUDA_KERNEL_LOOP(index, n) {
out_diff[index] = in_diff[index] * ((in_data[index] & 0)
+ (in_data[index] &= 0) * negative_slope);
template &typename Dtype&
void ReLULayer&Dtype&::Backward_gpu(const vector&Blob&Dtype&*&& top,
const vector&bool&& propagate_down,
const vector&Blob&Dtype&*&& bottom) {
if (propagate_down[0]) {
const Dtype* bottom_data = bottom[0]-&gpu_data();
const Dtype* top_diff = top[0]-&gpu_diff();
Dtype* bottom_diff = bottom[0]-&mutable_gpu_diff();
const int count = bottom[0]-&count();
Dtype negative_slope = this-&layer_param_.relu_param().negative_slope();
// NOLINT_NEXT_LINE(whitespace/operators)
ReLUBackward&Dtype&&&&CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS&&&(
count, top_diff, bottom_data, bottom_diff, negative_slope);
CUDA_POST_KERNEL_CHECK;
INSTANTIATE_LAYER_GPU_FUNCS(ReLULayer);
// namespace caffe
本文转载自 u的专栏 , 原文链接: , 转载请保留本声明!
每一个你不满意的现在,都有一个你没有努力的曾经。
Copyright (C) 2015&&
&&Powered by&深度学习(15)
转自:/lutingting/p/5240688.html
在caffe中,网络的结构由prototxt文件中给出,由一些列的Layer(层)组成,常用的层如:数据加载层、卷积操作层、pooling层、非线性变换层、内积运算层、归一化层、损失计算层等;本篇主要介绍激活函数层
1. 激活函数层总述
下面首先给出激活函数层的结构设置的一个小例子(定义在.prototxt文件中)&
name: &relu1&
//该层名称
type: &ReLU&
//激活函数类型
bottom: &conv1& //该层输入数据blob
top: &conv1&
//该层输出数据blob
注意:activation是一种element-wise的操作,所以,可以做in-place来节约内存,通过给bottom blob和top blon相同的名字来实验
2. 可选激活函数类型
type:&Sigmoid&:f(x)=1/(1+e(-x))
type:&ReLu&:f(x)=max(x,0)
type:&AbsVal&:f(x)=abs(x)
type:&TanH&:f(x)=[e(x)-e(-x)]/[e(x)+e(-x)]
type:&BNLL&:f(x)=&(shift + scale * x) ^ power
type:&Power&:f(x)=log(1 + exp(x))
参考知识库
* 以上用户言论只代表其个人观点,不代表CSDN网站的观点或立场
访问:49601次
积分:1430
积分:1430
排名:千里之外
原创:86篇
转载:11篇
(2)(1)(5)(10)(2)(9)(5)(1)(1)(1)(2)(3)(1)(5)(3)(2)(15)(5)(24)(2)开发(72)
absval_layer.hpp
#ifndef CAFFE_ABSVAL_LAYER_HPP_
#define CAFFE_ABSVAL_LAYER_HPP_
#include &vector&
#include &caffe/blob.hpp&
#include &caffe/layer.hpp&
#include &caffe/proto/caffe.pb.h&
#include &caffe/layers/neuron_layer.hpp&
namespace caffe {
* @brief Computes @f$ y = |x| @f$
* @param bottom input Blob vector (length 1)
-# @f$ (N \times C \times H \times W) @f$
the inputs @f$ x @f$
* @param top output Blob vector (length 1)
-# @f$ (N \times C \times H \times W) @f$
the computed outputs @f$ y = |x| @f$
template &typename Dtype&
class AbsValLayer : public NeuronLayer&Dtype& {
explicit AbsValLayer(const LayerParameter& param)
: NeuronLayer&Dtype&(param) {}
virtual void LayerSetUp(const vector&Blob&Dtype&*&& bottom,
const vector&Blob&Dtype&*&& top);
virtual inline const char* type() const { return &AbsVal&; }
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
protected:
/// @copydoc AbsValLayer
virtual void Forward_cpu(const vector&Blob&Dtype&*&& bottom,
const vector&Blob&Dtype&*&& top);
virtual void Forward_gpu(const vector&Blob&Dtype&*&& bottom,
const vector&Blob&Dtype&*&& top);
* @brief Computes the error gradient w.r.t. the absolute value inputs.
* @param top output Blob vector (length 1), providing the error gradient with
respect to the outputs
-# @f$ (N \times C \times H \times W) @f$
containing error gradients @f$ \frac{\partial E}{\partial y} @f$
with respect to computed outputs @f$ y @f$
* @param propagate_down see Layer::Backward.
* @param bottom input Blob vector (length 2)
-# @f$ (N \times C \times H \times W) @f$
the inputs @f$ x @f$; Backward fills their diff with
gradients @f$
\frac{\partial E}{\partial x} =
\mathrm{sign}(x) \frac{\partial E}{\partial y}
@f$ if propagate_down[0]
virtual void Backward_cpu(const vector&Blob&Dtype&*&& top,
const vector&bool&& propagate_down, const vector&Blob&Dtype&*&& bottom);
virtual void Backward_gpu(const vector&Blob&Dtype&*&& top,
const vector&bool&& propagate_down, const vector&Blob&Dtype&*&& bottom);
// namespace caffe
// CAFFE_ABSVAL_LAYER_HPP_
absval_layer.cpp
#include &vector&
#include &caffe/layers/absval_layer.hpp&
#include &caffe/util/math_functions.hpp&
namespace caffe {
template &typename Dtype&
void AbsValLayer&Dtype&::LayerSetUp(const vector&Blob&Dtype&*&& bottom,
const vector&Blob&Dtype&*&& top) {
NeuronLayer&Dtype&::LayerSetUp(bottom, top);
CHECK_NE(top[0], bottom[0]) && this-&type() && & Layer does not &
&allow in-place computation.&;
template &typename Dtype&
void AbsValLayer&Dtype&::Forward_cpu(
const vector&Blob&Dtype&*&& bottom, const vector&Blob&Dtype&*&& top) {
const int count = top[0]-&count();
Dtype* top_data = top[0]-&mutable_cpu_data();
caffe_abs(count, bottom[0]-&cpu_data(), top_data);
template &typename Dtype&
void AbsValLayer&Dtype&::Backward_cpu(const vector&Blob&Dtype&*&& top,
const vector&bool&& propagate_down, const vector&Blob&Dtype&*&& bottom) {
const int count = top[0]-&count();
const Dtype* top_diff = top[0]-&cpu_diff();
if (propagate_down[0]) {
const Dtype* bottom_data = bottom[0]-&cpu_data();
Dtype* bottom_diff = bottom[0]-&mutable_cpu_diff();
caffe_cpu_sign(count, bottom_data, bottom_diff);
caffe_mul(count, bottom_diff, top_diff, bottom_diff);
#ifdef CPU_ONLY
STUB_GPU(AbsValLayer);
INSTANTIATE_CLASS(AbsValLayer);
REGISTER_LAYER_CLASS(AbsVal);
// namespace caffe
参考知识库
* 以上用户言论只代表其个人观点,不代表CSDN网站的观点或立场
访问:29168次
排名:千里之外
原创:45篇
转载:53篇
(1)(3)(1)(4)(1)(12)(9)(2)(2)(1)(18)(2)(6)(3)(1)(4)(10)(13)(5)(1)(1)(2)(1) 上传我的文档
 下载
 收藏
该文档贡献者很忙,什么也没留下。
 下载此文档
正在努力加载中...
caffe深度学习薛开宇笔记实例_基于卷积神经网络的声音识别
下载积分:2000
内容提示:caffe深度学习薛开宇笔记实例_基于卷积神经网络的声音识别
文档格式:PDF|
浏览次数:19|
上传日期: 21:46:51|
文档星级:
该用户还上传了这些文档
caffe深度学习薛开宇笔记实例_基于卷积神经网络的声音识别
官方公共微信新手学caffe,看到example/lenet模型的.proto文件,有一个疑问,就是conv或者pool层没有定义神经元激活函数,或者模型中有一个不能更改的激活函数。对比theano代码的conv+pool层会用到激活函数。&br&&div class=&highlight&&&pre&&code class=&language-text&&layer {
name: &conv1&
type: &Convolution&
bottom: &data&
top: &conv1&
lr_mult: 1
lr_mult: 2
convolution_param {
num_output: 20
kernel_size: 5
weight_filler {
type: &xavier&
bias_filler {
type: &constant&
name: &pool1&
type: &Pooling&
bottom: &conv1&
top: &pool1&
pooling_param {
kernel_size: 2
&/code&&/pre&&/div&&div class=&highlight&&&pre&&code class=&language-python&&&span class=&c&&# convolve input feature maps with filters&/span&
&span class=&n&&conv_out&/span& &span class=&o&&=&/span& &span class=&n&&conv2d&/span&&span class=&p&&(&/span&
&span class=&nb&&input&/span&&span class=&o&&=&/span&&span class=&nb&&input&/span&&span class=&p&&,&/span&
&span class=&n&&filters&/span&&span class=&o&&=&/span&&span class=&bp&&self&/span&&span class=&o&&.&/span&&span class=&n&&W&/span&&span class=&p&&,&/span&
&span class=&n&&filter_shape&/span&&span class=&o&&=&/span&&span class=&n&&filter_shape&/span&&span class=&p&&,&/span&
&span class=&n&&input_shape&/span&&span class=&o&&=&/span&&span class=&n&&image_shape&/span&
&span class=&p&&)&/span&
&span class=&c&&# downsample each feature map individually, using maxpooling&/span&
&span class=&n&&pooled_out&/span& &span class=&o&&=&/span& &span class=&n&&downsample&/span&&span class=&o&&.&/span&&span class=&n&&max_pool_2d&/span&&span class=&p&&(&/span&
&span class=&nb&&input&/span&&span class=&o&&=&/span&&span class=&n&&conv_out&/span&&span class=&p&&,&/span&
&span class=&n&&ds&/span&&span class=&o&&=&/span&&span class=&n&&poolsize&/span&&span class=&p&&,&/span&
&span class=&n&&ignore_border&/span&&span class=&o&&=&/span&&span class=&bp&&True&/span&
&span class=&p&&)&/span&
&span class=&c&&# add the bias term. Since the bias is a vector (1D array), we first&/span&
&span class=&c&&# reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will&/span&
&span class=&c&&# thus be broadcasted across mini-batches and feature map&/span&
&span class=&c&&# width & height&/span&
&span class=&bp&&self&/span&&span class=&o&&.&/span&&span class=&n&&output&/span& &span class=&o&&=&/span& &span class=&n&&T&/span&&span class=&o&&.&/span&&span class=&n&&tanh&/span&&span class=&p&&(&/span&&span class=&n&&pooled_out&/span& &span class=&o&&+&/span& &span class=&bp&&self&/span&&span class=&o&&.&/span&&span class=&n&&b&/span&&span class=&o&&.&/span&&span class=&n&&dimshuffle&/span&&span class=&p&&(&/span&&span class=&s&&'x'&/span&&span class=&p&&,&/span& &span class=&mi&&0&/span&&span class=&p&&,&/span& &span class=&s&&'x'&/span&&span class=&p&&,&/span& &span class=&s&&'x'&/span&&span class=&p&&))&/span&
&/code&&/pre&&/div&
新手学caffe,看到example/lenet模型的.proto文件,有一个疑问,就是conv或者pool层没有定义神经元激活函数,或者模型中有一个不能更改的激活函数。对比theano代码的conv+pool层会用到激活函数。layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
lr_mult: 1
lr_mult: 2
convolution_param {
num_output: 20
kernel_size: 5
weight_filler {
type: "xavier"
bias_filler {
type: "constant"
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
kernel_size: 2
有的,见 在你的例子中 pool 用了max
已有帐号?
无法登录?
社交帐号登录}

我要回帖

更多关于 caffe reshape函数 的文章

更多推荐

版权声明:文章内容来源于网络,版权归原作者所有,如有侵权请点击这里与我们联系,我们将及时删除。

点击添加站长微信