首页 > 美文阅读

【yolov56.0源码解析】---utilsaugmentations.py

更新时间:2023-06-10 15:55:58 阅读：评论：0

yolov5 数据增强代码

主要有以下⼏种⽅式：

class Albumentations # 数据增强package,⽐pytorch ⾃带的transform 更丰富

def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5)# 图像增强⽅式，hgain 是⾊调，不同⾊调不同颜⾊，sgain是饱和度， vgain是亮度

def hist_equalize(im, clahe=True, bgr=Fal):# 采⽤⾃适应直⽅图均衡化做图像增强

def replicate(im, labels)#

def letterbox(im, new_shape=(640,640), color=(114,114,114), auto=True, scaleFill=Fal, scaleup=True, stride=32)# 图像size扩充⾄指定⼤⼩

def random_perspective(im, targets=(), gments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,

border=(0,0))# 随机增强

def copy_paste(im, labels, gments, p=0.5)# 复制粘贴

def cutout(im, labels, p=0.5)# 裁剪

def mixup(im, labels, im2, labels2)# mixup

def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16)# 框筛选

下⾯⼀个个来看图像增强的⽅式：

Albumentations 图像增强

class Albumentations:

# YOLOv5 Albumentations class (optional, only ud if package is installed)

def__init__(lf):

try:

'''

albumentations --⼀个数据增强的package,⽐pytorch的transform丰富；详情

blog.csdn/cp1314971/article/details/106039800?ops_request_misc=%257B%2522request%255Fid%2522%253A%25221640158569167 80261966386%2522%252C%2522scm%2522%253A%252220140713.130102334..%2522%257D&request_id=164015856916780261966386&biz_id=0&ut m_medium=distribute.pc_-task-blog-2~all~baidu_landing_v2~default-1-106039800.pc_arch_es_clickV2&utm_term=import+albument ations+&spm=1018.2226.3001.4187

'''

import albumentations as A

check_version(A.__version__,'1.0.3')# version requirement

A.Blur(p=0.01),# 图像随机⼤⼩内核模糊输⼊图像

A.MedianBlur(p=0.01),# 图像随机模糊输⼊图像

A.ToGray(p=0.01),# 转成灰度图

A.CLAHE(p=0.01),#

A.RandomBrightnessContrast(p=0.0),# 随机亮度和对⽐度

A.RandomGamma(p=0.0),#

A.ImageCompression(quality_lower=75, p=0.0)],# 图像压缩

bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))#

logging.info(colorstr('albumentations: ')+', '.join(f'{x}'for x ansforms if x.p))

except ImportError:# package not installed, skip

pass

except Exception as e:

logging.info(colorstr('albumentations: ')+f'{e}')

def__call__(lf, im, labels, p=1.0):

ansform and random.random()< p:

new = lf.transform(image=im, bboxes=labels[:,1:], class_labels=labels[:,0])# transformed

im, labels = new['image'], np.array([[c,*b]for c, b in zip(new['class_labels'], new['bboxes'])])

return im, labels

hsv ⾊调-饱和度-亮度的图像增强

def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):# 做h-⾊调， s-饱和度， v-亮度上⾯的随机增强

# HSV color-space augmentation

if hgain or sgain or vgain:

r = np.random.uniform(-1,1,3)*[hgain, sgain, vgain]+1# random gains ⽣成3个[-1, 1)之间的随机数，分别与hsv相乘后+1 [0,2]之间

hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV))# 将图像从BGR 转成HSV ，拆分

dtype = im.dtype # uint8

x = np.arange(0,256, dtype=r.dtype)# [0, 1, 2, (255)

lut_hue =((x * r[0])%180).astype(dtype)# [0, 180)

lut_sat = np.clip(x * r[1],0,255).astype(dtype)# 将数组截断⾄[0, 255]

lut_val = np.clip(x * r[2],0,255).astype(dtype)# 将数组截断⾄[0, 255]

im_hsv = ((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))

# cv2.LUT lookup-table 查找表⽅式，即通过lut_hue 这个表对之前hue数值做修正，返回0-255对应位置的lut_hue值具体： blog.csdn/Dontla/ article/details/103963085

# 合并三个通道

cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im)# no return needed

直⽅图均衡化增强

def hist_equalize(im, clahe=True, bgr=Fal):# 直⽅图均衡化增强参考 /my-love-is-python/p/10405811.html

# Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255

yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr el cv2.COLOR_RGB2YUV)# 将图像从bgr转成YUV

if clahe:

c = ateCLAHE(clipLimit=2.0, tileGridSize=(8,8))

# ateCLAHE 实例化⾃适应直⽅图均衡化函数局部直⽅图均衡化，不会使得细节消失

# c.apply 进⾏⾃适应直⽅图均衡化

yuv[:,:,0]= c.apply(yuv[:,:,0])

el:

# cv2.equalizeHist 进⾏像素点的均衡化，即全局均衡化，使得整体亮度提升，但是局部会模糊

yuv[:,:,0]= cv2.equalizeHist(yuv[:,:,0])# equalize Y channel histogram

return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr el cv2.COLOR_YUV2RGB)# convert YUV image to RGB

图像框的平移复制增强

def replicate(im, labels):# 复制，实际上指的是框的平移

# Replicate labels

h, w = im.shape[:2]# 获取图像长宽

boxes = labels[:,1:].astype(int)# 获取框的位置和⼤⼩

x1, y1, x2, y2 = boxes.T # 框的左右和上下位置

s =((x2 - x1)+(y2 - y1))/2# side length (pixels)

for i in s.argsort()[:round(s.size *0.5)]:# smallest indices

x1b, y1b, x2b, y2b = boxes[i]

bh, bw = y2b - y1b, x2b - x1b

yc, xc =int(random.uniform(0, h - bh)),int(random.uniform(0, w - bw))# offt x, y

x1a, y1a, x2a, y2a =[xc, yc, xc + bw, yc + bh]

im[y1a:y2a, x1a:x2a]= im[y1b:y2b, x1b:x2b]# im4[ymin:ymax, xmin:xmax]

labels = np.append(labels,[[labels[i,0], x1a, y1a, x2a, y2a]], axis=0)

return im, labels

图像以letterbox缩放

def letterbox(im, new_shape=(640,640), color=(114,114,114), auto=True, scaleFill=Fal, scaleup=True, stride=32): # 按⽐例缩放图⽚，并将其他部分填充，到resize图⽚的⼤⼩

# Resize and pad image while meeting stride-multiple constraints

售后维修部shape = im.shape[:2]# current shape [height, width]

if isinstance(new_shape,int):# 如果输⼊是⼀个数字，默认长宽相等

new_shape =(new_shape, new_shape)

# Scale ratio (new / old)

r =min(new_shape[0]/ shape[0], new_shape[1]/ shape[1])#

if not scaleup:# only scale down, do not scale up (for better val mAP) # 如果只缩⼩，不放⼤图⽚

r =min(r,1.0)

# Compute padding

ratio = r, r # width, height ratios

new_unpad =int(round(shape[1]* r)),int(round(shape[0]* r))# 对图⽚按⽐例缩放后的长宽 (width, height)

dw, dh = new_shape[1]- new_unpad[0], new_shape[0]- new_unpad[1]# wh padding 对缩放后的图像需要填充的size if auto:# minimum rectangle

dw, dh = np.mod(dw, stride), np.mod(dh, stride)# wh padding 取能被stride 整除的dw 和dh

elif scaleFill:# stretch

dw, dh =0.0,0.0

new_unpad =(new_shape[1], new_shape[0])

ratio = new_shape[1]/ shape[1], new_shape[0]/ shape[0]# width, height ratios

dw /=2# divide padding into 2 sides

dh /=2

if shape[::-1]!= new_unpad:# resize

im = size(im, new_unpad, interpolation=cv2.INTER_LINEAR)# 先将图⽚按⽐例缩放到指定⼤⼩

top, bottom =int(round(dh -0.1)),int(round(dh +0.1))# 上下位置

left, right =int(round(dw -0.1)),int(round(dw +0.1))# 左右位置

im = pyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)# add border

# pyMakeBorder 对im设置边界框

return im, ratio,(dw, dh)

旋转等变换（未更新完全，后续补充）

def random_perspective(im, targets=(), gments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,

border=(0,0)):

# ansforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))

# targets = [cls, xyxy]

height = im.shape[0]+ border[0]*2# shape(h,w,c)

width = im.shape[1]+ border[1]*2

# Center [w, h, c] -->[w/2 , h/2, c]

'''

[ 1 0 -w/2

0 1 -h/2

0 0 1

]

'''

C = np.eye(3)

C[0,2]=-im.shape[1]/2# x translation (pixels)

C[1,2]=-im.shape[0]/2# y translation (pixels)

# Perspective [w, h, c] -->[w/2 , h/2, c]

'''

[ 1 0 0

rand 1 0

rand 0 1

]

'''

P = np.eye(3)

P[2,0]= random.uniform(-perspective, perspective)# x perspective (about y)

P[2,1]= random.uniform(-perspective, perspective)# y perspective (about x)

# Rotation and Scale

R = np.eye(3)

a = random.uniform(-degrees, degrees)

# a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations

s = random.uniform(1- scale,1+ scale)

# s = 2 ** random.uniform(-scale, scale)

R[:2]= RotationMatrix2D(angle=a, center=(0,0), scale=s)

# Shear

S = np.eye(3)

S[0,1]= math.tan(random.uniform(-shear, shear)* math.pi /180)# x shear (deg)

S[1,0]= math.tan(random.uniform(-shear, shear)* math.pi /180)# y shear (deg)

# Translation

T = np.eye(3)

T[0,2]= random.uniform(0.5- translate,0.5+ translate)* width # x translation (pixels)

T[1,2]= random.uniform(0.5- translate,0.5+ translate)* height # y translation (pixels)

# Combined rotation matrix

M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT tf.matmul(A,C)=np.dot(A,C)= A@C

if(border[0]!=0)or(border[1]!=0)or(M != np.eye(3)).any():# image changed

if perspective:

im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114,114,114))

el:# affine

im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114,114,114))# cv2.warpAffine()放射变换函数，可实现旋转，平移，缩放；变换后的平⾏线依旧平⾏动漫图片可爱

# cv2.warpAffine()放射变换函数，可实现旋转，平移，缩放；变换后的平⾏线依旧平⾏

# cv2.warpPerspective()透视变换函数，可保持直线不变形，但是平⾏线可能不再平⾏

# Visualize

# import matplotlib.pyplot as plt

# ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()

# ax[0].imshow(im[:, :, ::-1]) # ba

# ax[1].imshow(im2[:, :, ::-1]) # warped

# Transform label coordinates

n =len(targets)

if n:

u_gments =any(x.any()for x in gments)

new = np.zeros((n,4))

if u_gments:# warp gments

gments = resample_gments(gments)# upsample

for i, gment in enumerate(gments):

xy = np.ones((len(gment),3))

xy[:,:2]= gment

xy = xy @ M.T # transform

xy = xy[:,:2]/ xy[:,2:3]if perspective el xy[:,:2]# perspective rescale or affine

# clip

new[i]= gment2box(xy, width, height)

el:# warp boxes

xy = np.ones((n *4,3))

xy[:,:2]= targets[:,[1,2,3,4,1,4,3,2]].reshape(n *4,2)# x1y1, x2y2, x1y2, x2y1

xy = xy @ M.T # transform

xy =(xy[:,:2]/ xy[:,2:3]if perspective el xy[:,:2]).reshape(n,8)# perspective rescale or affine

# create new boxes

x = xy[:,[0,2,4,6]]

y = xy[:,[1,3,5,7]]

new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T

# clip

new[:,[0,2]]= new[:,[0,2]].clip(0, width)

new[:,[1,3]]= new[:,[1,3]].clip(0, height)

# filter candidates

i = box_candidates(box1=targets[:,1:5].T * s, box2=new.T, area_thr=0.01if u_gments el0.10)

targets = targets[i]

targets[:,1:5]= new[i]头发简笔画

return im, targets

复制粘贴

def copy_paste(im, labels, gments, p=0.5):

# Implement Copy-Paste augmentation arxiv/abs/2012.07177, labels as nx5 np.array(cls, xyxy)

n =len(gments)

if p and n:

h, w, c = im.shape # height, width, channels

im_new = np.zeros(im.shape, np.uint8)

for j in random.sample(range(n), k=round(p * n)):

l, s = labels[j], gments[j]

box = w - l[3], l[2], w - l[1], l[4]

ioa = bbox_ioa(box, labels[:,1:5])# interction over area

if(ioa <0.30).all():# allow 30% obscuration of existing labels

labels = np.concatenate((labels,[[l[0],*box]]),0)

gments.atenate((w - s[:,0:1], s[:,1:2]),1))

cv2.drawContours(im_new,[gments[j].astype(np.int32)],-1,(255,255,255), cv2.FILLED)

result = cv2.bitwi_and(src1=im, src2=im_new)

result = cv2.flip(result,1)# augment gments (flip left-right)古代女子图片

i = result >0# pixels to replace

# i[:, :] = result.max(2).reshape(h, w, 1) # act over ch

im[i]= result[i]# cv2.imwrite('debug.jpg', im) # debug

return im, labels, gments

图⽚随机马赛克

def cutout(im, labels, p=0.5):# 随机马赛克

大海睡了# Applies image cutout augmentation arxiv/abs/1708.04552

if random.random()< p:

h, w = im.shape[:2]

scales =[0.5]*1+[0.25]*2+[0.125]*4+[0.0625]*8+[0.03125]*16

# [0.5 0.25 0.25 0.125 0.125 0.0125 ... 0.03125] 31

# image size fraction

for s in scales:

mask_h = random.randint(1,int(h * s))# create random masks

mask_w = random.randint(1,int(w * s))

# box 确定随机马赛克的位置

xmin =max(0, random.randint(0, w)- mask_w //2)

ymin =max(0, random.randint(0, h)- mask_h //2)

xmax =min(w, xmin + mask_w)

ymax =min(h, ymin + mask_h)

# apply random color mask 确定随机马赛克的马赛克像素

im[ymin:ymax, xmin:xmax]=[random.randint(64,191)for _ in range(3)]#

# return unobscured labels

心怎么组词

if len(labels)and s >0.03:#

box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)

ioa = bbox_ioa(box, labels[:,1:5])# interction over area 计算马赛克位置与框位置的IOU ⼤⼩

付月labels = labels[ioa <0.60]# remove >60% obscured labels # 保留下与马赛克框iou⼩于 0.6的，⼤于0.6的认为没有学习的必须return labels

世界现代设计史mixup

本文发布于:2023-06-10 15:55:58，感谢您对本站的认可！

本文链接：https://www.wtabcd.cn/fanwen/fan/82/920423.html

上一篇：大学生社会实践报告1500字大学生社会实践报告字(十三篇)

下一篇：最新缅怀革命先烈的演讲稿500字缅怀革命先烈的演讲稿1000字(7篇)

标签：均衡化图像函数位置变换平移缩放

留言与评论（共有 0 条评论）