【⽬标检测系列】yolov3之损失函数以及边框回归pytorch源码
注释以及理论理解
1.代码下载:
2.边框回归与损失函数相关的源码,在⽂件utils.py中。边框回归和损失函数。
边框回归说⽩了就是:找到⼀个平移和放缩系数,使得⽬标值与真值去⽆限接近。满⾜这个⽆限接近条件的系数就是回归系数了。⽆限接近的意思就是两者尽量像呗,量化的话就是构造个损失函数,让这个函数代表⼆者相似程度呗,越像,⼆者之差越⼩, 马英语单词怎么读
通过不断缩⼩损失函数值,就可以获得⼀个合适的平移和放缩系数了啊。缩⼩损失函数值的过程就是优化啊。神经⽹络的常规套路吧。
核⼼代码就是下⾯的啊这两个函数。
compute_loss就是构造损失函数过程,其中边框回归损失函数的组成部分就是 损失函数 = 边框回归系数*anchors - 正样本真值,边框回归神经⽹络训练的⽬的就是找到这组回归系数使得正样本对应的anchors⽆限接近正样本的真值。最终程序输出的置信度是存在置信度*分类置信度,切记,切记。
def compute_loss(p, targets, model): # predictions, targets, model
ft = torch.cuda.FloatTensor if p[0].is_cuda el torch.Tensor
lcls, lbox, lobj = ft([0]), ft([0]), ft([0])
#筛选正样本,并且将anchor与正样本对应上且正样本的box信息映射到了每⼀层特征图上。
tcls, tbox, indices, anchor_vec = build_targets(p, targets, model)
print("tcls = ", tcls)
#print("tbox = ", tbox)写申请书的格式
#print("indices = ", indices)
#print("anchor_vec = ", anchor_vec)
h = model.hyp # hyperparameters
red = 'mean' # Loss reduction (sum or mean)
# Define criteria#定义损失函数,输⼊参数
# pos_weight可⽤于控制各样本的权重 reduction⽤来控制损失输出模式。
# 设为"sum"表⽰对样本进⾏求损失和;设为"mean"表⽰对样本进⾏求损失的
# 平均值;⽽设为"none"表⽰对样本逐个求损失,输出与输⼊的shape⼀样
BCEcls = nn.BCEWithLogitsLoss(pos_weight=ft([h['cls_pw']]), reduction=red)
BCEobj = nn.BCEWithLogitsLoss(pos_weight=ft([h['obj_pw']]), reduction=red)
# class label smoothing /pdf/1902.04103.pdf eqn 3
cp, cn = smooth_BCE(eps=0.0)# cp = 1 cn = 0 不去理会了
# focal loss
g = h['fl_gamma'] # focal loss gamma
if g > 0:#这个focal loss没⽤
BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
# Compute loss
np, ng = 0, 0 # number grid points, targets
for i, pi in enumerate(p): # layer index, layer predictions #pi为第i层特征图 i是层编号
b, a, gj, gi = indices[i] # image, anchor, gridy, gridx 是第i层正样本的batch编号 anchor 以及栅格坐标
tobj = s_like(pi[..., 0]) # target obj
np += tobj.numel()
# Compute loss
# Compute loss
nb = len(b)#nb = 第i层的正样本个数
if nb: # number of targets
ng += nb #ng 总正样本个数
#通过b a gj gi 做索引,在特征图pi上取出正样本特征值
ps = pi[b, a, gj, gi] # prediction subt corresponding to targets #维度为(nb, 9)
# ps[:, 2:4] = torch.sigmoid(ps[:, 2:4]) # wh power loss (uncomment)
# GIoU 计算中⼼坐标⽤sigmoid将tx,ty压缩到[0,1]区间內,可以有效的确保⽬标中⼼处于执⾏预测的⽹格单元中,防⽌偏移过多 # 预测出来的是⼀个偏移量不是绝对坐标值切记切记
# ⽹络不会预测边界框中⼼的确切坐标⽽是预测与预测⽬标的grid cell左上⾓相关的偏移tx,ty
pxy = torch.sigmoid(ps[:, 0:2]) # pxy = pxy * s - (s - 1) / 2, s = 1.5 (scale_xy)
# 计算正样本box框的 w, h
pwh = p(ps[:, 2:4]).clamp(max=1E3) * anchor_vec[i]
# 合成完整的box框信息,带中⼼坐标带w h
# pbox 是预测函数 anchor是初值,之间的变换参数就是要训练出的回归系数训练的⽬的就是让pbox⽆限接近真值,得到这组 # ⽆限接近真值时的系数这就是边框回归的核⼼
pbox = torch.cat((pxy, pwh), 1) # predicted box
# 下⾯就是构造损失函数让预测结果通过怎样的优化去更接近真值了构造损失函数后就定义了对应关系了啊
#计算GIOU部分
giou = bbox_iou(pbox.t(), tbox[i], x1y1x2y2=Fal, GIoU=True) # giou computation
#计算giou的损失值 box的loss是1-giou的值
lbox += (1.0 - giou).sum() if red == 'sum' el (1.0 - giou).mean() # giou loss
# 给正样本的tobj赋初值,初值⾥⽤到了giou
tobj[b, a, gj, gi] = (1.0 - ) + * giou.detach().clamp(0).type(tobj.dtype) # giou ratio
> 1: # cls loss (only if multiple class) 类别⼤于1 多分类
t = torch.full_like(ps[:, 5:], cn) # targets
t[range(nb), tcls[i]] = cp
lcls += BCEcls(ps[:, 5:], t) # BCE 这个算的是类别的loss值
# lcls += CE(ps[:, 5:], tcls[i]) # CE
# Append targets to text file
# with open('', 'a') as file:
# [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
# 计算交叉熵正样本与特征图上提的特征计算交叉熵这个算的是置信度的loss值
lobj += BCEobj(pi[..., 4], tobj) # obj loss
#print("lcls = ", lcls)
lbox *= h['giou']
lobj *= h['obj']
lcls *= h['cls']
if red == 'sum':
bs = tobj.shape[0] # batch size
lobj *= 3 / (6300 * bs) * 2 # 3 / np * 2
if ng:
lcls *= 3 / ng /
lbox *= 3 / ng
loss = lbox + lobj + lcls
return loss, torch.cat((lbox, lobj, lcls, loss)).detach()
#在每个yolo层将预设的anchor和ground truth进⾏匹配,得到正样本
#规则:
# 1.如果⼀个预测框与所有的GroundTruth的最⼤ IoU < ignore_thresh时,那这个预测框就是负样本
# 2.如果Ground Tru台湾校园歌曲
th的中⼼点落在⼀个区域中,该区域就负责检测该物体。将与该物体有最⼤IoU
# 的预测框作为正样本(注意这⾥没有⽤到ignore thresh,即使该最⼤IoU<ignore thresh也不会影
# 响该预测框为正样本)
def build_targets(p, targets, model):
# targets = [image, class, x, y, w, h] image表⽰batch中图⽚编号 class表⽰类别 x y w h就是box信息
nt = targets.shape[0]
print("targets = ", targets)
tcls, tbox, indices, av = [], [], [], []
reject, u_all_anchors = True, True
gain = s(6, device=targets.device) # normalized to gridspace gain
# m = dules())[-1]
# for i in range(m.nl):
# for i in range(m.nl):
# anchors = m.anchors[i]
multi_gpu = type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
for i, j in lo_layers):
# get number of grid points and anchor vec for this yolo layer
# yolov3.cfg中有三个yolo层,这部分⽤于获取对应yolo层的grid(⽹格)尺⼨和anchor⼤⼩
# i值从0到2 对应尺度从⼤到⼩ cfg中读取⽂件后 0层除32 1层除16 2层除8获得anchors值在当前层特征图上的尺⼨(anchors值对应的是原图上的坐标) anchors = dule_list[j].anchor_vec if multi_gpu dule_list[j].anchor_vec
# iou of targets-anchors
# p[i]就是某层的预测结果⼤⼩为0 12 * 12 * 9, 1层为24 * 24 * 9, 2层为48 * 48 * 9 这个9是class(4) + 4坐标 + 1置信度⽽得
gain[2:] = sor(p[i].shape)[[3, 2, 3, 2]] # whwh gain 将该层的特征图的w h w h依次放在 gain的【2 3 4 5 】位置
#t存放真值在特征图上的box信息包括中⼼点坐标宽⾼坐标(相对于特征图的坐标)
t, a = targets * gain, []
#gwh存放真值在特征图上的box宽⾼ gw gh
gwh = t[:, 4:6]
if nt:
# anchor_vec: shape = [3, 2] 代表3个anchor
# gwh: shape = [4, 2] 代表 4个ground truth
# iou: shape = [3, 4] 代表 3个anchor与对应的两个ground truth的iou
# 常规的iou计算
iou = wh_iou(anchors, gwh) # iou(3,n) = wh_iou(anchors(3,2), gwh(n,2))
if u_all_anchors:
na = anchors.shape[0] # number of anchors na = anchor个数
#每个真值对应的anchor编号
a = torch.arange(na).view(-1, 1).repeat(1, nt).view(-1)
#每个真值在对应anchor上的类别信息以及box框的信息值(对应于当前层特征图的)【image, class, x, y, w, h】
t = t.repeat(na, 1)
el: # u best anchor only
#只选择最⼤iou的anchor与真值对应
iou, a = iou.max(0) # best iou and anchor
# reject a春节初几上班
nchors below iou_thres (OPTIONAL, increas P, lowers R)
if reject:
#j中存的是 [true of fal] 是每个anchor与每个真值的⼀⼀对应关系
j = iou.view(-1) > model.hyp['iou_t'] # iou threshold hyperparameter
#滤除阈值⼩于ignore thresh的anchor t存的是真值在特征图上的box信息以及图像和类别信息,a是anchor的编号信息
t, a = t[j], a[j]
#做完阈值滤除后筛选剩下的真值与anchor对应其实就是正样本
# Indices #b是图像编号 c是真值的类别编号
b, c = t[:, :2].long().t() # target image, class
# 真值在特征图上的box信息
gxy = t[:, 2:4] # grid x, y box在特征图上的中⼼坐标
gwh = t[:, 4:6] # grid w, h box在特征图上的框的宽⾼
#是⽹格索引注意这⾥通过long将其转化为整形,代表格⼦的左上⾓
gi, gj = gxy.long().t() # grid x, y indices
# indice结构体保存内容为:
'''
b: ⼀个batch中的下标
a: 代表所选中的正样本的anchor的下标
gj, gi: 代表所选中的栅格的左上⾓坐标
'''
indices.append((b, a, gj, gi))
# Box
gxy -= gxy.floor() # xy 下取整然后 gxy 算的是box框在特征图的栅格中的坐标值是浮点的。
tbox.append(torch.cat((gxy, gwh), 1)) # xywh (grids) tbox存的是正样本box在特征图栅格内的浮点坐标以及box框的宽⾼值
av.append(anchors[a]) # anchor vec
# Class
tcls.append(c)
if c.shape[0]: # if any targets
asrt c.max() < , 'Model accepts %g class labeled from 0-%g, however you labelled a class %g. ' \
'See /ultralytics/yolov3/wiki/Train-Custom-Data' % (
'See /ultralytics/yolov3/wiki/Train-Custom-Data' % (
<, - 1, c.max())
# tcls yolov3的三层中存下来的正样本的类别号,tbox 正样本存下来的在对应特征图上的相对⾃⼰栅格的box中⼼浮点坐标以及对应的box框 # indices存放的是上⾯写了不赘述了 av存放的是正样本对应的不同层的anchor的box的宽⾼值(anchor真值除32 16 8)
return tcls, tbox, indices, av
3.完整代码以及注释:
import glob
import math
import os
import random
import shutil
import subprocess
from pathlib import Path
from sys import platform
import cv2
#import matplotlib
#import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.u("Agg")
import matplotlib.pyplot as plt
import numpy as np
import torch
as nn
import torchvision
from tqdm import tqdm
from . import torch_utils # , google_utils
# Set printoptions
torch.t_printoptions(linewidth=320, precision=5, profile='long')
np.t_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5
<('font', **{'size': 11})
# Prevent OpenCV from multithreading (to u PyTorch DataLoader)
cv2.tNumThreads(0)
def init_eds(ed=0):
random.ed(ed)
np.random.ed(ed)
torch_utils.init_eds(ed=ed)
def check_git_status():
if platform in ['linux', 'darwin']:
# Suggest 'git pull' if repo is out of date
s = subprocess.check_output('if [ -d .git ]; then git fetch && git status -uno; fi', shell=True).decode('utf-8')
if 'Your branch is behind' in s:
print第一次坐飞机要注意什么
(s[s.find('Your branch is behind'):s.find('\n\n')] + '\n')
def load_class(path):
# Loads *.names file at 'path'
with open(path, 'r') as f:
names = f.read().split('\n')
return list(filter(None, names)) # filter removes empty strings (such as last line)
def labels_to_class_weights(labels, nc=80):
# Get class weights (inver frequency) from training labels
if labels[0] is None: # no labels loaded
return torch.Tensor()
labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO
class = labels[:, 0].astype(np.int) # labels = [class xywh]
weights = np.bincount(class, minlength=nc) # occurences per class 算每类⽬标出现的次数呢例如四类可能结果为[100 100 20 1] (ps:样本总数221)
# Prepend gridpoint count (for uCE trianing)
# gpi = ((320 / 32 * np.array([1, 2, 4])) ** 2 * 3).sum() # gridpoints per image
# weights = np.hstack([gpi * len(labels) - weights.sum() * 9, weights * 9]) ** 0.5 # prepend gridpoints to start
weights[weights == 0] = 1 # replace empty bins with 1
weights = 1 / weights # number of targets per class
weights /= weights.sum() # normalize 对样本分布做归⼀化,这个结果总和为1 表⽰了每类⽬标在此数据集中的百分⽐
return torch.from_numpy(weights)
def labels_to_image_weights(labels, nc=80, class_s(80)):
# Produces image weights bad on class mAPs
n = len(labels)
class_counts = np.array([np.bincount(labels[i][:, 0].astype(np.int), minlength=nc) for i in range(n)])
image_weights = (shape(1, nc) * class_counts).sum(1)
# index = rand花叶绿萝
om.choices(range(n), weights=image_weights, k=1) # weight image sample
return image_weights
def coco_class_weights(): # frequency of each class in coco train2014
n = [187437, 4955, 30920, 6033, 3838, 4332, 3160, 7051, 7677, 9167, 1316, 1372, 833, 6757, 7355, 3302, 3776, 4671,
6769, 5706, 3908, 903, 3686, 3596, 6200, 7920, 8779, 4505, 4272, 1862, 4698, 1962, 4403, 6659, 2402, 2689,
4012, 4175, 3411, 17048, 5637, 14553, 3923, 5539, 4289, 10084, 7018, 4314, 3099, 4638, 4939, 5543, 2038, 4004,
5053, 4578, 27292, 4113, 5931, 2905, 11174, 2873, 4036, 3415, 1517, 4122, 1980, 4464, 1190, 2302, 156, 3933,
1877, 17630, 4337, 4624, 1075, 3468, 135, 1380]
weights = 1 / torch.Tensor(n)
weights /= weights.sum()
# with open('data/coco.names', 'r') as f:
# for k, v in zip(接受用英语怎么说
f.read().splitlines(), n):
# print('%20s: %g' % (k, v))
return weights
def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper)
# /node-718/what-object-categories-labels-are-in-coco-datat/
# a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
# b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
# x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco
# x2 = [list(b[i] == a).index(True) if any(b[i] == a) el None for i in range(91)] # coco to darknet
x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18,广州一日游必去景点
19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
return x
def xyxy2xywh(x):
# Transform box coordinates from [x1, y1, x2, y2] (where xy1=top-left, xy2=bottom-right) to [x, y, w, h]
y = s_like(x) if isinstance(x, torch.Tensor) s_like(x)
y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
y[:, 2] = x[:, 2] - x[:, 0] # width
y[:, 3] = x[:, 3] - x[:, 1] # height
return y
def xywh2xyxy(x):
# Transform box coordinates from [x, y, w, h] to [x1, y1, x2, y2] (where xy1=top-left, xy2=bottom-right)