|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
|
# -------------------------------------------------------- # Faster R-CNN # Copyright (c) 2015 Microsoft # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick and Sean Bell # --------------------------------------------------------
import sys
sys.path.append('../')
import numpy as np import numpy.random as npr import tensorflow as tf
from Lib.bbox_overlaps import bbox_overlaps from Lib.bbox_transform import bbox_transform from Lib.faster_rcnn_config import cfg from Lib.generate_anchors import generate_anchors
#该函数计算每个anchor对应的ground truth(前景/背景,坐标偏移值) def anchor_target_layer(rpn_cls_score, gt_boxes, im_dims, _feat_stride, anchor_scales):
''' Make Python version of _anchor_target_layer_py below Tensorflow compatible '''
#执行_anchor_target_layer_py函数,传参有网络预测的rpn分类分数,ground_truth_box,图像的尺寸,与原图相比特征图缩小的比例和anchor的尺度 rpn_labels,rpn_bbox_targets,rpn_bbox_inside_weights,rpn_bbox_outside_weights = \
tf.py_func(_anchor_target_layer_py, [rpn_cls_score, gt_boxes, im_dims, _feat_stride, anchor_scales],
[tf.float32, tf.float32, tf.float32, tf.float32])
#转化成tensor rpn_labels = tf.convert_to_tensor(tf.cast(rpn_labels,tf.int32), name = 'rpn_labels')
rpn_bbox_targets = tf.convert_to_tensor(rpn_bbox_targets, name = 'rpn_bbox_targets')
rpn_bbox_inside_weights = tf.convert_to_tensor(rpn_bbox_inside_weights , name = 'rpn_bbox_inside_weights')
rpn_bbox_outside_weights = tf.convert_to_tensor(rpn_bbox_outside_weights , name = 'rpn_bbox_outside_weights')
return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def _anchor_target_layer_py(rpn_cls_score, gt_boxes, im_dims, _feat_stride, anchor_scales):
""" Python version
Assign anchors to ground-truth targets. Produces anchor classification labels and bounding-box regression targets.
# Algorithm: # # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap """ im_dims = im_dims[0] #获得原图的尺度[height, width] _anchors = generate_anchors(scales=np.array(anchor_scales))# 在原图上生成9个锚点,16*16,shape: [9,4] _num_anchors = _anchors.shape[0] #_num_anchors值为9
# allow boxes to sit over the edge by a small amount _allowed_border = 0 #将anchor超出边界的限度设置为0
# Only minibatch of 1 supported 在这里核验batch_size是否为1
assert rpn_cls_score.shape[0] == 1, \
'Only single item batches are supported'
# map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] #在这里得到了rpn输出的H和W,总的anchor数目应该是H×W×9
# 1. Generate proposals from bbox deltas and shifted anchors
#下面是在原图上生成anchor shift_x = np.arange(0, width) * _feat_stride #shape: [width,] shift_y = np.arange(0, height) * _feat_stride #shape: [height,] shift_x, shift_y = np.meshgrid(shift_x, shift_y) #生成网格 shift_x shape: [height, width], shift_y shape: [height, width] shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel())).transpose() # shape[height*width, 4]
# add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# reshape to (K*A, 4) shifted anchors A = _num_anchors # A = 9 K = shifts.shape[0] # K=height*width(特征图上的) all_anchors = (_anchors.reshape((1, A, 4)) +
shifts.reshape((1, K, 4)).transpose((1, 0, 2))) #shape[K,A,4] 得到所有的anchor all_anchors = all_anchors.reshape((K * A, 4))
total_anchors = int(K * A) #total_anchors记录anchor的数目
# 2.anchors inside the image
#inds_inside=所有的anchor中x1,y1,x2,y2没有超过图像边界的 inds_inside = np.where(
(all_anchors[:, 0] >= -_allowed_border) &
(all_anchors[:, 1] >= -_allowed_border) &
(all_anchors[:, 2] < im_dims[1] + _allowed_border) & # width (all_anchors[:, 3] < im_dims[0] + _allowed_border) # height )[0]
# keep only inside anchors anchors = all_anchors[inds_inside, :]#在这里选出合理的anchors,指的是没超出边界的
# label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32)#labels的长度就是合法的anchor的个数 labels.fill(-1) #先用-1填充labels
# 3.overlaps between the anchors and the gt boxes
# bbox_overlaps (ex, gt) 计算anchors与gt_boxes之间的重合度IOU,大于0.7标记为前景图,小于0.3标记为背景图;返回类型(n,k),即第n个anchors与第K个gt_boxes的IOU重合度值
# 对所有的没超过图像边界的anchor计算overlap,得到的shape: [len(anchors), len(gt_boxes)]
# np.ascontiguousarray 返回一个地址连续的数组 overlaps = bbox_overlaps(
np.ascontiguousarray(anchors, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float))
#argmax(a,axis=0) 表示每一列最大值的索引 shape[0];argmax(axis=1) 表示每一行之间的最大值的索引;shape[1] argmax_overlaps = overlaps.argmax(axis=1) #对于每个anchor,找到与gt_box坐标的IOU的最大值,即找到每个anchors最大重叠率的gt_boxes。 max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] #对于每个anchor,找到最大的overlap的gt_box shape: [len(anchors)] gt_argmax_overlaps = overlaps.argmax(axis=0) #对于每个gt_box,找到对应的最大overlap的anchor。shape[len(gt_boxes),] gt_max_overlaps = overlaps[gt_argmax_overlaps,
np.arange(overlaps.shape[1])]#对于每个gt_box,找到与anchor的最大IoU值。shape[len(gt_boxes),] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]#再次对于每个gt_box,找到对应的最大overlap的anchor。shape[len(gt_boxes),]
# 4.根据预设阈值和overlap重叠率,打上前背景标签1|0
if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: #如果不需要抑制positive的anchor,就先给背景anchor赋值,这样在赋前景值的时候可以覆盖。
# assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 #在这里将anchors与gt_boxes最大IoU仍然小于阈值(0.3)的某些anchor置0
# fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 #在这里将每个gt_box对应IoU最大的anchor置1
# fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 #在这里将anchors与gt_boxes最大IoU大于阈值(0.7)的某些anchor置1
if cfg.TRAIN.RPN_CLOBBER_POSITIVES: #如果需要抑制positive的anchor,就将背景anchor后赋值
# assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 #在这里将最大IoU仍然小于阈值(0.3)的某些anchor置0
# subsample positive labels if we have too many
# 5.随机抛弃一些前景anchor和背景anchors num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)#计算出一个训练batch中需要的前景的数量 fg_inds = np.where(labels == 1)[0] #找出被置为前景的anchors
if len(fg_inds) > num_fg:
disable_inds = npr.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False)
labels[disable_inds] = -1 #如果事实存在的前景anchor大于了所需值,就随机抛弃一些前景anchor
# subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) ##计算出一个训练batch中需要的背景的数量 bg_inds = np.where(labels == 0)[0] #找出被置为背景的anchors
if len(bg_inds) > num_bg:
disable_inds = npr.choice(
bg_inds, size=(len(bg_inds) - num_bg), replace=False)
labels[disable_inds] = -1 #如果事实存在的背景anchor大于了所需值,就随机抛弃一些背景anchor
# bbox_targets: The deltas (relative to anchors) that Faster R-CNN should
# try to predict at each anchor
# TODO: This "weights" business might be deprecated. Requires investigation
# 6.对于每个anchor与其对应的gt_boxes,得到四个坐标变换值(tx,ty,th,tw)。 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) #对每个在原图内部的anchor,用全0初始化坐标变换值 bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) #对于每个anchor与最大的overlap的gt_boxes的框偏移量,作为标签值用于后续框回归
# 7.计算positive_weights和negative_weights bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) #使用全0初始化inside_weights bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) #在前景anchor处赋权重1
bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) #使用全0初始化outside_weights
if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: #如果RPN_POSITIVE_WEIGHT小于0的话,
# uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0)
positive_weights = np.ones((1, 4)) * 1.0 / num_examples #则positive_weights和negative_weights都一样 negative_weights = np.ones((1, 4)) * 1.0 / num_examples
else:
assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
(cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) #如果RPN_POSITIVE_WEIGHT位于0和1之间的话, positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
np.sum(labels == 1))
negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
np.sum(labels == 0)) #则positive_weights和negative_weights分别赋值 bbox_outside_weights[labels == 1, :] = positive_weights
bbox_outside_weights[labels == 0, :] = negative_weights #将positive_weights和negative_weights赋给bbox_outside_weights
# map up to original set of anchors
# 8.统一所有的标签,并转化标签labels的格式后,返回rpn_labels,rpn_bbox_targets,rpn_bbox_inside_weights,rpn_bbox_outside_weights labels = _unmap(labels, total_anchors, inds_inside, fill=-1)#把图像内部的anchor对应的label映射回总的anchor(加上了那些超出边界的anchor,类别填充-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)#把图像内部的anchor对应的bbox_target映射回所有的anchor(加上了那些超出边界的anchor,填充0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) #把图像内部的anchor对应的inside_weights映射回总的anchor(加上了那些超出边界的anchor,填充0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) #把图像内部的anchor对应的outside_weights映射回总的anchor(加上了那些超出边界的anchor,填充0)
# labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
labels = labels.reshape((1, 1, A * height, width)) #将anchor的类别label数组形状置为[1,1,9*height,width] rpn_labels = labels
# bbox_targets rpn_bbox_targets = bbox_targets.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) #将anchor的位置映射数组的形状置为[1,9*4,height,width]
# bbox_inside_weights rpn_bbox_inside_weights = bbox_inside_weights.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) #将anchor的inside_weights数组的形状置为[1,9*4,height,width]
# bbox_outside_weights rpn_bbox_outside_weights = bbox_outside_weights.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) #将anchor的outside_weights数组的形状置为[1,9*4,height,width]
return rpn_labels,rpn_bbox_targets,rpn_bbox_inside_weights,rpn_bbox_outside_weights #返回所有的ground truth值
def _unmap(data, count, inds, fill=0): #_unmap函数将图像内部的anchor映射回到生成的所有的anchor
""" Unmap a subset of item (data) back to the original set of items (of size count) """
if len(data.shape) == 1:
ret = np.empty((count, ), dtype=np.float32)
ret.fill(fill)
ret[inds] = data
else:
ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
ret.fill(fill)
ret[inds, :] = data
return ret
def _compute_targets(ex_rois, gt_rois): #_compute_targets函数计算anchor和对应的gt_box的位置映射
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 5
return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
|