【yolov56.0源码解析】---utilsaugmentations.py
yolov5 数据增强代码
主要有以下⼏种⽅式:
class Albumentations # 数据增强package,⽐pytorch ⾃带的transform 更丰富
def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5)# 图像增强⽅式,hgain 是⾊调,不同⾊调不同颜⾊,sgain是饱和度, vgain是亮度
def hist_equalize(im, clahe=True, bgr=Fal):# 采⽤⾃适应直⽅图均衡化做图像增强
def replicate(im, labels)#
def letterbox(im, new_shape=(640,640), color=(114,114,114), auto=True, scaleFill=Fal, scaleup=True, stride=32)# 图像size扩充⾄指定⼤⼩
def random_perspective(im, targets=(), gments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,
border=(0,0))# 随机增强
def copy_paste(im, labels, gments, p=0.5)# 复制粘贴
def cutout(im, labels, p=0.5)# 裁剪
def mixup(im, labels, im2, labels2)# mixup
def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16)# 框筛选
下⾯⼀个个来看图像增强的⽅式:
Albumentations 图像增强
class Albumentations:
# YOLOv5 Albumentations class (optional, only ud if package is installed)
def__init__(lf):
try:
'''
albumentations --⼀个数据增强的package,⽐pytorch的transform丰富;详情
blog.csdn/cp1314971/article/details/106039800?ops_request_misc=%257B%2522request%255Fid%2522%253A%25221640158569167 80261966386%2522%252C%2522scm%2522%253A%252220140713.130102334..%2522%257D&request_id=164015856916780261966386&biz_id=0&ut m_medium=distribute.pc_-task-blog-2~all~baidu_landing_v2~default-1-106039800.pc_arch_es_clickV2&utm_term=import+albument ations+&spm=1018.2226.3001.4187
'''
import albumentations as A
check_version(A.__version__,'1.0.3')# version requirement
A.Blur(p=0.01),# 图像随机⼤⼩内核模糊输⼊图像
A.MedianBlur(p=0.01),# 图像随机模糊输⼊图像
A.ToGray(p=0.01),# 转成灰度图
A.CLAHE(p=0.01),#
A.RandomBrightnessContrast(p=0.0),# 随机亮度和对⽐度
A.RandomGamma(p=0.0),#
A.ImageCompression(quality_lower=75, p=0.0)],# 图像压缩
bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))#
logging.info(colorstr('albumentations: ')+', '.join(f'{x}'for x ansforms if x.p))
except ImportError:# package not installed, skip
pass
except Exception as e:
logging.info(colorstr('albumentations: ')+f'{e}')
def__call__(lf, im, labels, p=1.0):
ansform and random.random()< p:
new = lf.transform(image=im, bboxes=labels[:,1:], class_labels=labels[:,0])# transformed
im, labels = new['image'], np.array([[c,*b]for c, b in zip(new['class_labels'], new['bboxes'])])
return im, labels
hsv ⾊调-饱和度-亮度的图像增强
def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):# 做h-⾊调, s-饱和度, v-亮度上⾯的随机增强
# HSV color-space augmentation
if hgain or sgain or vgain:
r = np.random.uniform(-1,1,3)*[hgain, sgain, vgain]+1# random gains ⽣成3个[-1, 1)之间的随机数,分别与hsv相乘后+1 [0,2]之间
hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV))# 将图像从BGR 转成HSV ,拆分
dtype = im.dtype # uint8
x = np.arange(0,256, dtype=r.dtype)# [0, 1, 2, (255)
lut_hue =((x * r[0])%180).astype(dtype)# [0, 180)
lut_sat = np.clip(x * r[1],0,255).astype(dtype)# 将数组截断⾄[0, 255]
lut_val = np.clip(x * r[2],0,255).astype(dtype)# 将数组截断⾄[0, 255]
im_hsv = ((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
# cv2.LUT lookup-table 查找表⽅式,即通过lut_hue 这个表对之前hue数值做修正,返回0-255对应位置的lut_hue值具体: blog.csdn/Dontla/ article/details/103963085
# 合并三个通道
cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im)# no return needed
直⽅图均衡化增强
def hist_equalize(im, clahe=True, bgr=Fal):# 直⽅图均衡化增强参考 /my-love-is-python/p/10405811.html
# Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255
yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr el cv2.COLOR_RGB2YUV)# 将图像从bgr转成YUV
if clahe:
c = ateCLAHE(clipLimit=2.0, tileGridSize=(8,8))
# ateCLAHE 实例化⾃适应直⽅图均衡化函数局部直⽅图均衡化,不会使得细节消失
# c.apply 进⾏⾃适应直⽅图均衡化
yuv[:,:,0]= c.apply(yuv[:,:,0])
el:
# cv2.equalizeHist 进⾏像素点的均衡化,即全局均衡化,使得整体亮度提升,但是局部会模糊
yuv[:,:,0]= cv2.equalizeHist(yuv[:,:,0])# equalize Y channel histogram
return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr el cv2.COLOR_YUV2RGB)# convert YUV image to RGB
图像框的平移复制增强
def replicate(im, labels):# 复制,实际上指的是框的平移
# Replicate labels
h, w = im.shape[:2]# 获取图像长宽
boxes = labels[:,1:].astype(int)# 获取框的位置和⼤⼩
x1, y1, x2, y2 = boxes.T # 框的左右和上下位置
s =((x2 - x1)+(y2 - y1))/2# side length (pixels)
for i in s.argsort()[:round(s.size *0.5)]:# smallest indices
x1b, y1b, x2b, y2b = boxes[i]
bh, bw = y2b - y1b, x2b - x1b
yc, xc =int(random.uniform(0, h - bh)),int(random.uniform(0, w - bw))# offt x, y
x1a, y1a, x2a, y2a =[xc, yc, xc + bw, yc + bh]
im[y1a:y2a, x1a:x2a]= im[y1b:y2b, x1b:x2b]# im4[ymin:ymax, xmin:xmax]
labels = np.append(labels,[[labels[i,0], x1a, y1a, x2a, y2a]], axis=0)
return im, labels
图像以letterbox缩放
def letterbox(im, new_shape=(640,640), color=(114,114,114), auto=True, scaleFill=Fal, scaleup=True, stride=32): # 按⽐例缩放图⽚,并将其他部分填充,到resize图⽚的⼤⼩
# Resize and pad image while meeting stride-multiple constraints
售后维修部shape = im.shape[:2]# current shape [height, width]
if isinstance(new_shape,int):# 如果输⼊是⼀个数字,默认长宽相等
new_shape =(new_shape, new_shape)
# Scale ratio (new / old)
r =min(new_shape[0]/ shape[0], new_shape[1]/ shape[1])#
if not scaleup:# only scale down, do not scale up (for better val mAP) # 如果只缩⼩,不放⼤图⽚
r =min(r,1.0)
# Compute padding
ratio = r, r # width, height ratios
new_unpad =int(round(shape[1]* r)),int(round(shape[0]* r))# 对图⽚按⽐例缩放后的长宽 (width, height)
dw, dh = new_shape[1]- new_unpad[0], new_shape[0]- new_unpad[1]# wh padding 对缩放后的图像需要填充的size if auto:# minimum rectangle
dw, dh = np.mod(dw, stride), np.mod(dh, stride)# wh padding 取能被stride 整除的dw 和dh
elif scaleFill:# stretch
dw, dh =0.0,0.0
new_unpad =(new_shape[1], new_shape[0])
ratio = new_shape[1]/ shape[1], new_shape[0]/ shape[0]# width, height ratios
dw /=2# divide padding into 2 sides
dh /=2
if shape[::-1]!= new_unpad:# resize
im = size(im, new_unpad, interpolation=cv2.INTER_LINEAR)# 先将图⽚按⽐例缩放到指定⼤⼩
top, bottom =int(round(dh -0.1)),int(round(dh +0.1))# 上下位置
left, right =int(round(dw -0.1)),int(round(dw +0.1))# 左右位置
im = pyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)# add border
# pyMakeBorder 对im设置边界框
return im, ratio,(dw, dh)
旋转等变换(未更新完全,后续补充)
def random_perspective(im, targets=(), gments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,
border=(0,0)):
# ansforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
# targets = [cls, xyxy]
height = im.shape[0]+ border[0]*2# shape(h,w,c)
width = im.shape[1]+ border[1]*2
# Center [w, h, c] -->[w/2 , h/2, c]
'''
[ 1 0 -w/2
0 1 -h/2
0 0 1
]
'''
C = np.eye(3)
C[0,2]=-im.shape[1]/2# x translation (pixels)
C[1,2]=-im.shape[0]/2# y translation (pixels)
# Perspective [w, h, c] -->[w/2 , h/2, c]
'''
[ 1 0 0
rand 1 0
rand 0 1
]
'''
P = np.eye(3)
P[2,0]= random.uniform(-perspective, perspective)# x perspective (about y)
P[2,1]= random.uniform(-perspective, perspective)# y perspective (about x)
# Rotation and Scale
# Rotation and Scale
R = np.eye(3)
a = random.uniform(-degrees, degrees)
# a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
s = random.uniform(1- scale,1+ scale)
# s = 2 ** random.uniform(-scale, scale)
R[:2]= RotationMatrix2D(angle=a, center=(0,0), scale=s)
# Shear
S = np.eye(3)
S[0,1]= math.tan(random.uniform(-shear, shear)* math.pi /180)# x shear (deg)
S[1,0]= math.tan(random.uniform(-shear, shear)* math.pi /180)# y shear (deg)
# Translation
T = np.eye(3)
T[0,2]= random.uniform(0.5- translate,0.5+ translate)* width # x translation (pixels)
T[1,2]= random.uniform(0.5- translate,0.5+ translate)* height # y translation (pixels)
# Combined rotation matrix
M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT tf.matmul(A,C)=np.dot(A,C)= A@C
if(border[0]!=0)or(border[1]!=0)or(M != np.eye(3)).any():# image changed
if perspective:
im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114,114,114))
el:# affine
im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114,114,114))# cv2.warpAffine()放射变换函数,可实现旋转,平移,缩放;变换后的平⾏线依旧平⾏动漫图片可爱
# cv2.warpAffine()放射变换函数,可实现旋转,平移,缩放;变换后的平⾏线依旧平⾏
# cv2.warpPerspective()透视变换函数,可保持直线不变形,但是平⾏线可能不再平⾏
# Visualize
# import matplotlib.pyplot as plt
# ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
# ax[0].imshow(im[:, :, ::-1]) # ba
# ax[1].imshow(im2[:, :, ::-1]) # warped
# Transform label coordinates
n =len(targets)
if n:
u_gments =any(x.any()for x in gments)
new = np.zeros((n,4))
if u_gments:# warp gments
gments = resample_gments(gments)# upsample
for i, gment in enumerate(gments):
xy = np.ones((len(gment),3))
xy[:,:2]= gment
xy = xy @ M.T # transform
xy = xy[:,:2]/ xy[:,2:3]if perspective el xy[:,:2]# perspective rescale or affine
# clip
new[i]= gment2box(xy, width, height)
el:# warp boxes
xy = np.ones((n *4,3))
xy[:,:2]= targets[:,[1,2,3,4,1,4,3,2]].reshape(n *4,2)# x1y1, x2y2, x1y2, x2y1
xy = xy @ M.T # transform
xy =(xy[:,:2]/ xy[:,2:3]if perspective el xy[:,:2]).reshape(n,8)# perspective rescale or affine
# create new boxes
x = xy[:,[0,2,4,6]]
y = xy[:,[1,3,5,7]]
new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# clip
new[:,[0,2]]= new[:,[0,2]].clip(0, width)
new[:,[1,3]]= new[:,[1,3]].clip(0, height)
# filter candidates
i = box_candidates(box1=targets[:,1:5].T * s, box2=new.T, area_thr=0.01if u_gments el0.10)
targets = targets[i]
targets[:,1:5]= new[i]头发简笔画
return im, targets
复制粘贴
def copy_paste(im, labels, gments, p=0.5):
# Implement Copy-Paste augmentation arxiv/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
n =len(gments)
if p and n:
h, w, c = im.shape # height, width, channels
im_new = np.zeros(im.shape, np.uint8)
for j in random.sample(range(n), k=round(p * n)):
l, s = labels[j], gments[j]
box = w - l[3], l[2], w - l[1], l[4]
ioa = bbox_ioa(box, labels[:,1:5])# interction over area
if(ioa <0.30).all():# allow 30% obscuration of existing labels
labels = np.concatenate((labels,[[l[0],*box]]),0)
gments.atenate((w - s[:,0:1], s[:,1:2]),1))
cv2.drawContours(im_new,[gments[j].astype(np.int32)],-1,(255,255,255), cv2.FILLED)
result = cv2.bitwi_and(src1=im, src2=im_new)
result = cv2.flip(result,1)# augment gments (flip left-right)古代女子图片
i = result >0# pixels to replace
# i[:, :] = result.max(2).reshape(h, w, 1) # act over ch
im[i]= result[i]# cv2.imwrite('debug.jpg', im) # debug
return im, labels, gments
图⽚随机马赛克
def cutout(im, labels, p=0.5):# 随机马赛克
大海睡了# Applies image cutout augmentation arxiv/abs/1708.04552
if random.random()< p:
h, w = im.shape[:2]
scales =[0.5]*1+[0.25]*2+[0.125]*4+[0.0625]*8+[0.03125]*16
# [0.5 0.25 0.25 0.125 0.125 0.0125 ... 0.03125] 31
# image size fraction
for s in scales:
mask_h = random.randint(1,int(h * s))# create random masks
mask_w = random.randint(1,int(w * s))
# box 确定随机马赛克的位置
xmin =max(0, random.randint(0, w)- mask_w //2)
ymin =max(0, random.randint(0, h)- mask_h //2)
xmax =min(w, xmin + mask_w)
ymax =min(h, ymin + mask_h)
# apply random color mask 确定随机马赛克的马赛克像素
im[ymin:ymax, xmin:xmax]=[random.randint(64,191)for _ in range(3)]#
# return unobscured labels
心怎么组词
if len(labels)and s >0.03:#
box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
ioa = bbox_ioa(box, labels[:,1:5])# interction over area 计算马赛克位置与框位置的IOU ⼤⼩
付月labels = labels[ioa <0.60]# remove >60% obscured labels # 保留下与马赛克框iou⼩于 0.6的,⼤于0.6的认为没有学习的必须return labels
世界现代设计史mixup