# >>>>>##Decoder>>>>>###f_layer_fc1 = helper_v2d(f_decoder_list[-1],64,[1,1],'fc1',[1,1],'VALID',True, is_training)
f_layer_fc2 = helper_v2d(f_layer_fc1,32,[1,1],'fc2',[1,1],'VALID',True, is_training)
f_layer_drop = helper_tf_util.dropout(f_layer_fc2, keep_prob=0.5, is_training=is_training, scope='dp1')
f_layer_fc3 = helper_v2d(f_layer_drop, lf.config.num_class,[1,1],'fc',[1,1],'VALID',Fal,
is_training, activation_fn=None)
f_out = tf.squeeze(f_layer_fc3,[2])
return f_out
inference这个函数就是前向计算的函数,从中可以看到,RandLA-Net有以下结构:
1. 将特征升维到8
2. encoder:由4个(dilated_res_block+random_sample)构成,形成特征⾦字塔
3. 将⾦字塔尖的特征再次计算以下
4. decoder:由4个(nearest_interpolation+conv2d_transpo)构成,恢复到point-wi的特征
5. 由point-wi经过⼀些MLP,得到f_out
Encoder馄饨馅怎么调才好吃
dilated_res_block
def dilated_res_block(lf, feature, xyz, neigh_idx, d_out, name, is_training):
f_pc = helper_v2d(feature, d_out //2,[1,1], name +'mlp1',[1,1],'VALID',True, is_training)
f_pc = lf.building_block(xyz, f_pc, neigh_idx, d_out, name +'LFA', is_training)
f_pc = helper_v2d(f_pc, d_out *2,[1,1], name +'mlp2',[1,1],'VALID',True, is_training,
activation_fn=None)
shortcut = helper_v2d(feature, d_out *2,[1,1], name +'shortcut',[1,1],'VALID',
activation_fn=None, bn=True, is_training=is_training)
leaky_relu(f_pc + shortcut)
dilated_res_block有以下结构:
1. 将特征降维
2. 使⽤building_block聚集周围点的特征
壬读音3. 将特征升维
4. 计算short_cut的特征
5. 将两种特征加和,形成res的结构
那么building_block的结构:
#line 279-291
def building_block(lf, xyz, feature, neigh_idx, d_out, name, is_training):
d_in = _shape()[-1].value
红烧白菜
f_xyz = lf.relative_pos_encoding(xyz, neigh_idx)
f_xyz = helper_v2d(f_xyz, d_in,[1,1], name +'mlp1',[1,1],'VALID',True, is_training)
f_neighbours = lf.gather_neighbour(tf.squeeze(feature, axis=2), neigh_idx)
f_concat = tf.concat([f_neighbours, f_xyz], axis=-1)
f_pc_agg = lf.att_pooling(f_concat, d_out //2, name +'att_pooling_1', is_training)
f_xyz = helper_v2d(f_xyz, d_out //2,[1,1], name +'mlp2',[1,1],'VALID',True, is_training)
f_neighbours = lf.gather_neighbour(tf.squeeze(f_pc_agg, axis=2), neigh_idx)
f_concat = tf.concat([f_neighbours, f_xyz], axis=-1)
f_pc_agg = lf.att_pooling(f_concat, d_out, name +'att_pooling_2', is_training)
return f_pc_agg
dilated_res_block有以下结构:
1. 通过relative_pos_encoding提取k临近点相对于参考点的特征,对应⽂章中的公式(1)
2. 将得到的k临近点对于参考点的特征升维
3. 将k临近点相对于参考点的特征与k临近点本⾝的特征做cat
4. 使⽤att_pooling得到参考点的特征,对应⽂章公式(2)(3)
5. 再次对参考点的k临近点本⾝的特征与k临近点对于参考点的特征做cat
6. 使⽤att_pooling得到参考点的特征
通过两次更新参考点本⾝的特征,将参考点k临近点的信息聚集在参考点上
random_sample
这个sample的选择的点在构造数据集中已经定了,是通过将点顺序打乱,然后取前⾯⼀定数量的点得到的。Decoder
主要就是看⼀下如何做的nearest_interpolation
def nearest_interpolation(feature, interp_idx):
"""
:param feature: [B, N, d] input features matrix
:param interp_idx: [B, up_num_points, 1] nearest neighbour index
:return: [B, up_num_points, d] interpolated features matrix
"""
feature = tf.squeeze(feature, axis=2)
batch_size = tf.shape(interp_idx)[0]
up_num_points = tf.shape(interp_idx)[1]
interp_idx = tf.reshape(interp_idx,[batch_size, up_num_points])
interpolated_features = tf.batch_gather(feature, interp_idx)
interpolated_features = tf.expand_dims(interpolated_features, axis=2)
return interpolated_features
可以看到是根据interp_idx来上插值feature
# main_SemanticKITTI.py
# line 145
up_i = tf.py_func(DP.knn_arch,[sub_points, batch_pc,1], tf.int32)
可以看到up_i(其实就是interp_idx)是通过找1临近点来确定的。
也就是说nearest_interpolation将最临近点的特征赋值给新点的特征的。这样⼦做是与Pointnet++的interpolation有⼀定区别的。Loss
# RandLANet.py
# line 57-76
with tf.variable_scope('loss'):
lf.logits = tf.reshape(lf.logits,[-1, config.num_class])
lf.labels = tf.reshape(lf.labels,[-1])
# Boolean mask of points that should be ignored
ignored_bool = tf.zeros_like(lf.labels, dtype=tf.bool)
for ign_label fig.ignored_label_inds:
ignored_bool = tf.logical_or(ignored_bool, tf.equal(lf.labels, ign_label))
# Collect logits and labels that are not ignored
valid_idx = tf.squeeze(tf.where(tf.logical_not(ignored_bool)))
valid_logits = tf.gather(lf.logits, valid_idx, axis=0)
valid_labels_init = tf.gather(lf.labels, valid_idx, axis=0)
# Reduce label values in the range of logit shape
reducing_list = tf.fig.num_class, dtype=tf.int32)
inrted_value = tf.zeros((1,), dtype=tf.int32)
for ign_label fig.ignored_label_inds:
reducing_list = tf.concat([reducing_list[:ign_label], inrted_value, reducing_list[ign_label:]],0)
valid_labels = tf.gather(reducing_list, valid_labels_init)
Loss的计算包含如下结构:
1. 将前向计算的logits和label变扁平
2. 除去ignore的label的点
3. 更新label,由于有些label是ignored,所以这些label的序号要去掉,ignored label序号之后的⼀些label的序号要前移。
4. 加载每个label的weight,⽤CrossEntropy计算loss