py-faster-rcnn训练参数修改faster rcnn默认有三种⽹络模型 ZF(⼩)、VGG_CNN_M_1024(中)、VGG16 (⼤)
训练图⽚⼤⼩为500*500,类别数1。
⼀. 修改VGG_CNN_M_1024模型配置⽂件
1)train.prototxt⽂件
input-data层的num_class数值由21改为2;
roi-data层的num_class数值由21改为2;
cls_score层的num_output数值由21改为2(1+1);
bbox_pred层的num_output数值由84改为8(2*4);
2)test.prototxt⽂件(c++dll调⽤的.prototxt也要改)
cls_score层的num_output数值由21改为2(1+1);
bbox_pred层的num_output数值由84改为8(2*4);
心有灵犀游戏
3)lib/datats/pascal_voc.py⽂件
修改lf._class = ('__background__', '训练的数据类别')
4) 测试模型时需要改的⽂件faster_rcnn_test.pt
cls_score层的num_output数值由21改为2;
bbox_pred层的num_output数值由84改为8;
⼆. 解读训练测试配置参数⽂件config.py
import os
import os.path as osp
import numpy as np
# `pip install easydict` if you don't have it酸菜白肉血肠
from easydict import EasyDict as edict
__C = edict()
# Consumers can get config by:
# 在其他⽂件使⽤config要加的命令,例⼦见train_net.py
# from fast_rcnn_config import cfg
cfg = __C高中英语课文
#
# Training options
# 训练的选项
#
__C.TRAIN = edict()
# Scales to u during training (can list multiple scales)
# Each scale is the pixel size of an image's shortest side
# 最短边Scale成600
__C.TRAIN.SCALES = (600,)
# Max pixel size of the longest side of a scaled input image
# 最长边最⼤为1000
# 最长边最⼤为1000
__C.TRAIN.MAX_SIZE = 1000
# Images to u per minibatch
# ⼀个minibatch包含两张图⽚
__C.TRAIN.IMS_PER_BATCH = 2
# Minibatch size (number of regions of interest [ROIs])
# Minibatch⼤⼩,即ROI的数量
__C.TRAIN.BATCH_SIZE = 128
# Fraction of minibatch that is labeled foreground (i.e. class > 0)
# minibatch中前景样本所占的⽐例
__C.TRAIN.FG_FRACTION = 0.25
# Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
# 与前景的overlap⼤于等于0.5认为该ROI为前景样本
__C.TRAIN.FG_THRESH = 0.5
# Overlap threshold for a ROI to be considered background (class = 0 if
# overlap in [LO, HI))
# 与前景的overlap在0.1-0.5认为该ROI为背景样本
__C.TRAIN.BG_THRESH_HI = 0.5
__C.TRAIN.BG_THRESH_LO = 0.1
# U horizontally-flipped images during training?
# ⽔平翻转图像,增加数据量
__C.TRAIN.USE_FLIPPED = True
# Train bounding-box regressors
# 训练bb回归器
__C.TRAIN.BBOX_REG = True
# Overlap required between a ROI and ground-truth box in order for that ROI to
# be ud as a bounding-box regression training example
# BBOX阈值,只有ROI与gt的重叠度⼤于阈值,这样的ROI才能⽤作bb回归的训练样本梦见别人家办丧事
__C.TRAIN.BBOX_THRESH = 0.5
# Iterations between snapshots
# 每迭代1000次产⽣⼀次snapshot
__C.TRAIN.SNAPSHOT_ITERS = 10000
# solver.prototxt specifies the snapshot path prefix, this adds an optional
# infix to yield the path: <prefix>[_<infix>]_iters_XYZ.caffemodel
# 为产⽣的snapshot⽂件名称添加⼀个可选的infix. solver.prototxt指定了snapshot名称的前缀__C.TRAIN.SNAPSHOT_INFIX = ''
# U a prefetch thread in roi_data_layer.layer
# So far I haven't found this uful; likely more engineering work is required
# 在roi_data_layer.layer使⽤预取线程,作者认为不太有效,因此设为Fal
__C.TRAIN.USE_PREFETCH = Fal
# Normalize the targets (subtract empirical mean, divide by empirical stddev)
# 归⼀化⽬标BBOX_NORMALIZE_TARGETS,减去经验均值,除以标准差
__C.TRAIN.BBOX_NORMALIZE_TARGETS = True
正师级待遇# Deprecated (inside weights)
# 弃⽤
__C.TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
# Normalize the targets using "precomputed" (or made up) means and stdevs
# (BBOX_NORMALIZE_TARGETS must also be True)
# 在BBOX_NORMALIZE_TARGETS为True时,归⼀化targets,使⽤经验均值和⽅差
__C.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = Fal
__C.TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0)
__C.TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2)
# Train using the proposals
# Train using the proposals
# 使⽤'lective_arch'的proposal训练!注意该⽂件来⾃fast rcnn,下⽂提到RPN
__C.TRAIN.PROPOSAL_METHOD = 'lective_arch'
# Make minibatches from images that have similar aspect ratios (i.e. both
# tall and thin or both short and wide) in order to avoid wasting computation
# on zero-padding.
# minibatch的两个图⽚应该有相似的宽⾼⽐,以避免冗余的zero-padding计算
__C.TRAIN.ASPECT_GROUPING = True
# U RPN to detect objects
# 使⽤RPN检测⽬标
__C.TRAIN.HAS_RPN = Fal
# IOU >= thresh: positive example
# RPN的正样本阈值
__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
# IOU < thresh: negative example
# RPN的负样本阈值
__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
# If an anchor statisfied by positive and negative conditions t to negative
# 如果⼀个anchor同时满⾜正负样本条件,设为负样本(应该⽤不到)
__C.TRAIN.RPN_CLOBBER_POSITIVES = Fal
# Max number of foreground examples
# 前景样本的⽐例
__C.TRAIN.RPN_FG_FRACTION = 0.5
# Total number of examples
# batch size⼤⼩
__C.TRAIN.RPN_BATCHSIZE = 256
# NMS threshold ud on RPN proposals
# ⾮极⼤值抑制的阈值
__C.TRAIN.RPN_NMS_THRESH = 0.7
# Number of top scoring boxes to keep before apply NMS to RPN proposals
# 在对RPN proposal使⽤NMS前,要保留的top scores的box数量
__C.TRAIN.RPN_PRE_NMS_TOP_N = 12000北京有什么好大学
# Number of top scoring boxes to keep after applying NMS to RPN proposals
# 在对RPN proposal使⽤NMS后,要保留的top scores的box数量
__C.TRAIN.RPN_POST_NMS_TOP_N = 2000
# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) # proposal的⾼和宽都应该⼤于RPN_MIN_SIZE,否则,映射到conv5上不⾜⼀个像素点
__C.TRAIN.RPN_MIN_SIZE = 16
# Deprecated (outside weights)
# 弃⽤
__C.TRAIN.RPN_BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
# Give the positive RPN examples weight of p * 1 / {num positives}
# 给定正RPN样本的权重
# and give negatives a weight of (1 - p)
# 给定负RPN样本的权重
# Set to -1.0 to u uniform example weighting
# 这⾥正负样本使⽤相同权重
__C.TRAIN.RPN_POSITIVE_WEIGHT = -1.0
#
# Testing options
# 测试选项 ,类同
#
__C.TEST = edict()
# Scales to u during testing (can list multiple scales)
# Each scale is the pixel size of an image's shortest side
__C.TEST.SCALES = (600,)
# Max pixel size of the longest side of a scaled input image
__C.TEST.MAX_SIZE = 1000
# Overlap threshold ud for non-maximum suppression (suppress boxes with
# Overlap threshold ud for non-maximum suppression (suppress boxes with
# IoU >= this threshold)
# 测试时⾮极⼤值抑制的阈值
__C.TEST.NMS = 0.3
# Experimental: treat the (K+1) units in the cls_score layer as linear
# predictors (trained, eg, with one-vs-rest SVMs).
# 分类不再⽤SVM,设置为Fal
__C.TEST.SVM = Fal
# Test using bounding-box regressors
# 使⽤bb回归
__C.TEST.BBOX_REG = True
# Propo boxes
# 不使⽤RPN⽣成proposal
__C.TEST.HAS_RPN = Fal
# Test using the proposals
# 使⽤lective_arch⽣成proposal
__C.TEST.PROPOSAL_METHOD = 'lective_arch'
## NMS threshold ud on RPN proposals
# RPN proposal的NMS阈值
__C.TEST.RPN_NMS_THRESH = 0.7
## Number of top scoring boxes to keep before apply NMS to RPN proposals
__C.TEST.RPN_PRE_NMS_TOP_N = 6000
## Number of top scoring boxes to keep after applying NMS to RPN proposals
__C.TEST.RPN_POST_NMS_TOP_N = 300
# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
__C.TEST.RPN_MIN_SIZE = 16
#
# MISC
#
# The mapping from image coordinates to feature map coordinates might cau
# 从原图到feature map的坐标映射,可能会造成在原图上不同的box到了feature map坐标系上变得相同了# some boxes that are distinct in image space to become identical in feature
# coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is ud as the scale factor
# for identifying duplicate boxes.
# 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16
# 缩放因⼦
__C.DEDUP_BOXES = 1./16.
# Pixel mean values (BGR order) as a (1, 1, 3) array
# We u the same pixel mean for all networks even though it's not exactly what
# they were trained with
# 所有network所⽤的像素均值设为相同
__C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])
# For reproducibility
__C.RNG_SEED = 3
# A small number that's ud many times
# 极⼩的数
__C.EPS = 1e-14
# Root directory of project
# 项⽬根路径
__C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
# Data directory
# 数据路径
__C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data'))
# Model directory
# 模型路径
__C.MODELS_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'models', 'pascal_voc'))
# Name (or path to) the matlab executable
# matlab executable
__C.MATLAB = 'matlab'
# Place outputs under an experiments directory
# 输出在experiments路径下
__C.EXP_DIR = 'default'
# U GPU implementation of non-maximum suppression
# GPU实施⾮极⼤值抑制
__C.USE_GPU_NMS = True
# Default GPU device id
# 默认GPU id发朋友圈的短句
__C.GPU_ID = 0
def get_output_dir(imdb, net=None):
#返回输出路径,在experiments路径下
"""Return the directory where experimental artifacts are placed.
If the directory does not exist, it is created.
凤凰男什么意思啊
A canonical标准 path is built using the name from an imdb and a network
(if not None).
"""
outdir = osp.abspath(osp.join(__C.ROOT_DIR, 'output', __C.EXP_DIR, imdb.name)) if net is not None:
outdir = osp.join(outdir, net.name)
if not ists(outdir):
os.makedirs(outdir)
return outdir
def _merge_a_into_b(a, b):
#两个配置⽂件融合
"""Merge config dictionary a into config dictionary b, clobbering the
options in b whenever they are also specified in a.
"""
if type(a) is not edict:
return
for k, v in a.iteritems():
# a must specify keys that are in b
if not b.has_key(k):
rai KeyError('{} is not a valid config key'.format(k))
# the types must match, too
old_type = type(b[k])
if old_type is not type(v):
if isinstance(b[k], np.ndarray):
v = np.array(v, dtype=b[k].dtype)
el:
rai ValueError(('Type mismatch ({} vs. {}) '
'for config key: {}').format(type(b[k]),
type(v), k))
# recursively merge dicts
if type(v) is edict:
try:
_merge_a_into_b(a[k], b[k])
except:
print('Error under config key: {}'.format(k))
rai