首页 > 美文阅读

来了，YoloV5的TensorFlow版开源

更新时间:2023-05-27 09:41:52 阅读：评论：0

开源

⾃从yolov5开源以来，(不管因为啥原因)深受瞩⽬，我最近⽤tensorflow实现了其主要部分。可能是第⼀个纯正的tensorfow2版本，欢迎try and star：

之前在⼯作中接触过yolov3(跑过demo应该就算接触过了)，效果惊艳。我在视觉领域只是个新⼈(悲伤的是我⼀个中年⼈却在哪⼉哪⼉都TM是新⼈)，能⼒有限，疏漏难免。从头开始实现，对我来说是⼀次不错的经历，遇到和解决了⼀些细节问题。

如readme中所指出的，主要特点如下：感恩结尾

纯tensorflow2的实现

⽤yaml⽂件来配置模型和控制模型⼤⼩

我的同学糖糖⽀持⾃定义数据训练

马赛克数据增强

通过iou或长宽⽐匹配anchor

相邻正样本增强

⽀持多GPU训练

相对详细的代码注释

缺点多，提升空间巨⼤致四年后的自己

原理

结合代码简单回顾⼀下其主要原理和改进。知乎上已经有不少⾮常不错的解析⽂章可以参考，尤其是下⾯⼏篇。如有可能，直接读代码应该是更清晰、细节。

江⼤⽩：深⼊浅出Yolo系列之Yolov5核⼼基础知识完整讲解

深度眸：进击的后浪yolov5深度可视化解析

⽬标检测：Yolov5集百家之长

来⾃ @江⼤⽩的模型图

通过长宽⽐或iou匹配anchor

这⾥的实现了v3中根据iou分配anchor，以及v4/v5中根据宽⾼⽐来分配anchor，新的匹配⽤来解决物体靠近⽹格时的敏感问题，并可以涨点。

def assign_criterion_wh(lf, gt_wh, anchors, anchor_threshold): # return: plea note that the v5 default

anchor_threshold is 4.0, related to the positive sample augment gt_wh = tf.expand_dims(gt_wh, 0) # => 1 * n_gt * 2 anchors = tf.expand_dims(anchors, 1) # => n_anchor * 1 * 2 ratio = gt_wh / anchors # => n_anchor * n_gt * 2 matched_matrix = tf.reduce_max(tf.math.maximum(ratio, 1 / ratio), axis=2) < anchor_threshold # => n_anchor * n_gt return matched_matrixdef assign_criterion_iou(lf, gt_wh, anchors, anchor_threshold): # by IOU, anchor_threshold < 1 box_wh = tf.expand_dims(gt_wh, 0) # => 1 * n_gt * 2 box_area = box_wh[..., 0] * box_wh[..., 1] # => 1 * n_gt anchors = tf.cast(anchors, tf.float32) # => n_anchor * 2 anchors = tf.expand_dims(anchors, 1) # =>

关羽是被谁杀死的

n_anchor * 1 * 2 anchors_area = anchors[..., 0] * anchors[..., 1] # => n_anchor * 1 inter = tf.math.minimum(anchors[..., 0], box_wh[..., 0]) * tf.math.minimum(anchors[..., 1], box_wh[..., 1]) #

n_gt * n_anchor iou = inter / (anchors_area + box_area - inter + 1e-9) iou = iou > anchor_threshold return iou复制代码

正样本增强

正负样本的均衡⼀直是⽬标检测领域要解决的问题。v5中根据长宽⽐匹配到anchor之后，进⼀步将匹配到的⽹格的上下或左右最接近的邻居也增强为正样本。匹配的anchor以及坐标与初始匹配到的⽹格相同

def enrich_pos_by_position(lf, assigned_label, assigned_anchor, gain, matched_matrix, rect_style='rect4'): # using offt to extend more postive result, if x assigned_xy = assigned_label[..., 0:2] # n_matched * 2 offt = tf.constant([[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1]], tf.float32) grid_offt = tf.zeros_like(assigned_xy) if rect_style == 'rect2': g = 0.2 # offt elif rect_style == 'rect4': g = 0.5 # matched = (assigned_xy % 1. < g) & (assigned_xy > 1.) matched_left = matched[:, 0] matched_up = matched[:, 1] mat

ched = (assigned_xy % 1. > (1 - g)) &

(assigned_xy < tf.expand_dims(gain[0:2], 0) - 1.) matched_right = matched[:, 0] matched_down = matched[:, 1] assigned_anchor = tf.concat([assigned_anchor, assigned_anchor[matched_left], assigned_anchor[matched_up], assigned_anchor[matched_right], assigned_anchor[matched_down]], axis=0) assigned_label =

assigned_label[matched_right], assigned_label[matched_down]], axis=0) grid_offt = g * tf.concat(

[grid_offt, grid_offt[matched_left] + offt[1], grid_offt[matched_up] + offt[2], grid_offt[matched_right] + offt[3], grid_offt[matched_down] + offt[4]], axis=0) return assigned_label, assigned_anchor, grid_offt复制代码

通过yaml⽂件控制模型⼤⼩

过年是几月几日这⾥借鉴了efficientdet的思想，通过两个系数控制模型⼤⼩。

def par_model(lf, yaml_dict): anchors, nc, depth_multiple, width_multiple = yaml_dict['anchors'], yaml_dict['nc'],

yaml_dict['depth_multiple'], yaml_dict['width_multiple'] num_anchors = (len(anchors[0]) // 2) if isinstance(anchors, list) el anchors output_dims = num_anchors * (nc + 5) layers = [] # # from, number, module, args for i, (f, number, module, args) in enumerate(yaml_dict['backbone'] + yaml_dict['head']): module = eval(module) if isinstance(module, str) el module # all component is a Class, initialize here, call in lf.forward for j, arg in enumerate(args): try: args[j] = eval(arg) if isinstance(arg, str) el arg # eval strings, like Detect(nc, anchors) except: pass number = max(round(number * depth_multiple), 1) if number > 1 el number # control the model scale,

s/m/l/x if module in [Conv2D, Conv, Bottleneck, SPP, DWConv, Focus, BottleneckCSP, BottleneckCSP2, SPPCSP, VoVCSP]: c2 = args[0] c2 = il(c2 * width_multiple / 8) * 8 if c2 != output_dims el c2 args = [c2, *args[1:]] if module in [BottleneckCSP, BottleneckCSP2, SPPCSP, VoVCSP]: args.inrt(1, number) number = 1 modules = tf.keras.Sequential(*[module(*args) for _ in range(number)]) if number > 1 el module(*args) modules.i, modules.f = i, f layers.append(modules)

return layers复制代码

损失函数匹配

class YoloLoss(object): def __init__(lf, anchors, ignore_iou_threshold, num_class, img_size, label_smoothing=0):

lf.anchors = anchors lf.strides = [8, 16, 32] lf.ignore_iou_threshold = ignore_iou_threshold

lf.num_class = num_class lf.img_size = img_size lf.bce_conf =

穿越三国小说tf.keras.loss.BinaryCrosntropy(reduction=tf.keras.loss.Reduction.NONE) lf.bce_class =

tf.keras.loss.BinaryCrosntropy(reduction=tf.keras.loss.Reduction.NONE, label_smoothing=label_smoothing) def

__call__(lf, y_true, y_pred): iou_loss_all = obj_loss_all = class_loss_all = 0 balance = [1.0, 1.0, 1.0] if len(y_pred) == 3 el [4.0, 1.0, 0.4, 0.1] # P3-5 or P3-6 for i, (pred, true) in enumerate(zip(y_pred, y_true)): # preprocess, true: batch_size * grid * grid * 3 * 6, pred: batc

h_size * grid * grid * clss+5 true_box, true_obj, true_class = tf.split(true, (4, 1, -1), axis=-1) pred_box, pred_obj, pred_class = tf.split(pred, (4, 1, -1), axis=-1) if tf.shape(true_class)[-1] == 1 and lf.num_class > 1: true_class = tf._hot(tf.cast(true_class, tf.dtypes.int32), depth=lf.num_class, axis=-1), -2) # prepare: higher weights to smaller box, true_wh should be normalized to (0,1) box_scale = 2 - 1.0 * true_box[..., 2] * true_box[..., 3] / (lf.img_size ** 2) obj_mask = tf.squeeze(true_obj, -1) # # obj or noobj, batch_size * grid * grid * anchors_per_grid background_mask = 1.0 - obj_mask conf_focal = tf.squeeze(tf.math.pow(true_obj - pred_obj, 2), -1) # iou/giou/ciou/diou loss iou = bbox_iou(pred_box, true_box, xyxy=Fal, giou=True) iou_loss = (1 - iou) * obj_mask * box_scale # batch_size * grid * grid * 3 # confidence loss, Todo: multiply the

iou conf_loss = lf.bce_conf(true_obj, pred_obj) conf_loss = conf_focal * (obj_mask * conf_loss +

background_mask * conf_loss) # batch * grid * grid * 3 # class loss # u binary cross entropy loss for multi class, so every value is independent and sigmoid # plea note that the output of tf.keras.loss.bce is origial dim minus the last one class_loss = obj_mask * lf.bce_class(true_class, pred_class) iou_loss =

孕妇能吃绿豆吗tf.reduce_duce_sum(iou_loss, axis=[1, 2, 3])) conf_loss = tf.reduce_duce_sum(conf_loss, axis=[1, 2, 3])) class_loss = tf.reduce_duce_sum(class_loss, axis=[1, 2, 3])) iou_loss_all += iou_loss * balance[i] obj_loss_all += conf_loss * balance[i] class_loss_all += class_loss * lf.num_class * balance[i] # to balance the 3 loss复制代码

损失函数部分，还没有完全和v5的设置⼀样。v5中做了⼀些优化，如不同scale的平衡，如⽬标confidence损失的权重等。

效果女人的美

如果想要效果最佳，还是推荐原版pytorch，毕竟⼀直在更新中，v4和v5的作者也⼀直还在发⼒优化。如果对tensorflow有谜之爱好，或者想通过代码了解yolov5，我觉得我的版本写的更清楚⼀些（与之相应的代价是可能有细节遗漏甚⾄不对的地⽅），总之欢迎⼤家尝试。在MNIST检测数据上的效果：

在voc2012数据集上的效果(效果还有待加强)：

再⼤的数据集我就跑不动啦，毕竟只有1080Ti可⽤。

本文发布于:2023-05-27 09:41:52，感谢您对本站的认可！

本文链接：https://www.wtabcd.cn/fanwen/fan/82/791386.html

上一篇：开学典礼老师发言稿（通用6篇）

下一篇：热恋期男生的6种表现