
更新时间:2023-05-12 02:19:59 阅读: 评论:0

two-stage:典型代表为Faster-rcnn。其结构分为RPN(Region Proposal Network)和RCNN(Region Convolution Neural
_global__ void ROIPoolForward(const int nthreads, const float* bottom_data,
const float spatial_scale, const int height, const int width,
const int channels, const int pooled_height, const int pooled_width,
const float* bottom_rois, float* top_data, int* argmax_data)
CUDA_KERNEL_LOOP(index, nthreads)
int pw = index % pooled_width;//pooled_width=2,⽤户设置的参数,控制pooling输出⼤⼩
int ph = (index / pooled_width) % pooled_height;//pooled_height=2,⽤户设置的参数,控制pooling输出⼤⼩
int c  = (index / pooled_width / pooled_height) % channels;
int n  = index / pooled_width / pooled_height / channels;
// bottom_rois += n * 5;
int roi_batch_ind = bottom_rois[n * 5 + 0];
int roi_start_w = round(bottom_rois[n * 5 + 1] * spatial_scale);//rsw=0,左上⾓点横坐标,来⾃RPN
int roi_start_h = round(bottom_rois[n * 5 + 2] * spatial_scale);//rsh=3,左上⾓点纵坐标,来⾃RPN
int roi_end_w = round(bottom_rois[n * 5 + 3] * spatial_scale);//rew=7,右下⾓点横坐标,来⾃RPN
int roi_end_h = round(bottom_rois[n * 5 + 4] * spatial_scale);//reh=8,右下⾓点纵坐标,来⾃RPN
// Force malformed ROIs to be 1x1
int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1);
int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1);
float bin_size_h = (float)(roi_height) / (float)(pooled_height);
float bin_size_w = (float)(roi_width) / (float)(pooled_width);
int hstart = (int)(floor((float)(ph) * bin_size_h));
int wstart = (int)(floor((float)(pw) * bin_size_w));
int hend = (int)(ceil((float)(ph + 1) * bin_size_h));
int wend = (int)(ceil((float)(pw + 1) * bin_size_w));
// Add roi offts and clip to input boundaries
hstart = fminf(fmaxf(hstart + roi_start_h, 0), height);
hend = fminf(fmaxf(hend + roi_start_h, 0), height);
wstart = fminf(fmaxf(wstart + roi_start_w, 0), width);
wend = fminf(fmaxf(wend + roi_start_w, 0), width);
bool is_empty = (hend <= hstart) || (wend <= wstart);//当roi_width<pooled_width或roi_height<pooled_height时触发,此时bin_size<1
// Define an empty pooling region to be zero
float maxval = is_empty ? 0 : -FLT_MAX;
// If nothing is pooled, argmax = -1 caus nothing to be backprop'd
int maxidx = -1;
int bottom_data_batch_offt = roi_batch_ind * channels * height * width;
int bottom_data_offt = bottom_data_batch_offt + c * height * width;
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
int bottom_index = h * width + w;
if (bottom_data[bottom_data_offt + bottom_index] > maxval) {
maxval = bottom_data[bottom_data_offt + bottom_index];
maxidx = bottom_data_offt + bottom_index;
top_data[index] = maxval;
if (argmax_data != NULL)
argmax_data[index] = maxidx;
从上⾯ROI-pooling的实现过程不难看出,由于取整的影响,各个index⽅块中对应的宽⾼是不同的,有些是2,有些是3。⽽ROI-align做了⼀个⼩改动,使h, w可以是⼩数,并通过双线性内插取得各个像素值,消除了取整带来的误差。。
__global__ void ROIAlignForward(const int nthreads, const float* bottom_data, const float spatial_scale, const int height, const int width,
const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data) {
CUDA_1D_KERNEL_LOOP(index, nthreads) {
int pw = index % aligned_width;
int ph = (index / aligned_width) % aligned_height;
int c  = (index / aligned_width / aligned_height) % channels;
int n  = index / aligned_width / aligned_height / channels;
// bottom_rois += n * 5;
float roi_batch_ind = bottom_rois[n * 5 + 0];
float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
// Force malformed ROIs to be 1x1
float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
float bin_size_h = roi_height / (aligned_height - 1.);
float bin_size_w = roi_width / (aligned_width - 1.);
float h = (float)(ph) * bin_size_h + roi_start_h;
float w = (float)(pw) * bin_size_w + roi_start_w;
int hstart = fminf(floor(h), height - 2);
int wstart = fminf(floor(w), width - 2);
int img_start = roi_batch_ind * channels * height * width;
// bilinear interpolation
if (h < 0 || h >= height || w < 0 || w >= width) {
top_data[index] = 0.;
} el {
float h_ratio = h - (float)(hstart);
float w_ratio = w - (float)(wstart);
int upleft = img_start + (c * height + hstart) * width + wstart;
int upright = upleft + 1;
int downleft = upleft + width;
int downright = downleft + 1;
top_data[index] = bottom_data[upleft] * (1. - h_ratio) * (1. - w_ratio)
+ bottom_data[upright] * (1. - h_ratio) * w_ratio
+ bottom_data[downleft] * h_ratio * (1. - w_ratio)
+ bottom_data[downright] * h_ratio * w_ratio;
有些细⼼的同学可能发现了上⾯的代码没有进⾏pooling操作,只有内插。那是因为caffe实现roi-align时,在后⾯⼜接了⼀个avg_pooling 和max_pooling,以实现不同的roipooling操作,并且避免了重复开发。
Deformable Roi pooling
可变形roi提取⽅法来源于论⽂,⽂章介绍了形变卷积的⽅法(增加offt)和所带来的好处。其中,Deformable Roi pooling就是⼀种由此衍⽣⽽来的思路。
template <typename DType>

本文发布于:2023-05-12 02:19:59,感谢您对本站的认可!



标签:代码   位置   取整   带来   提取   特征   参数   理解
留言与评论(共有 0 条评论)
Copyright ©2019-2022 Comsenz Inc.Powered by © 专利检索| 网站地图