数据增强之RandAugment
摘要
近期研究表明,数据增⼴可以显著提⾼深度学习的范化性能,尤其是在图像分类和⽬标检测⽅⾯均取得了不错的成果。尽管这些策略主要⽬的是为了提升精度,与此同时,在半监督机制下因为对原有数据集进⾏了扩充,从⽽增加了数据集的鲁棒性。
常见的图像识别任务中,增⼴的过程⼀般都是作为预处理阶段的任务之⼀。往往由于数据集过⼤⽽造成极⼤的计算损耗障碍。此外,由于所处的阶段不同,这些⽅法⽆法像模型算法⼀样随意调整正则化强度(尽管数据增⼴的效果直接取决于模型和数据集的⼤⼩)。
传统⾃动数据增⼴的策略通常是基于⼩数据集在轻量模型上训练后再应⽤于更⼤的模型。这就在策略上造成了⼀定的限制约束。本⽂则解决了这两⼤限制。RandAugment可以将数据增⼴所产⽣的增量样本空间⼤⼤缩⼩,从⽽使其可与模型训练过程捆绑在⼀起完成,避免将其作为独⽴的预处理任务来完成。此外,本⽂设置了增⼴强度的正则化参数,可以根据不同的模型和数据集⼤⼩进⾏调整。RandAugment⽅法可以作为外置⼯具作⽤于不同的图像处理任务、数据集⼯作中。
在CIFAR-10/100、SVHN和ImageNet数据集上能持平甚⾄优于先前的⾃动数据增⼴⽅法性能。在Image
Net数据集上,Baline采⽤EfficientNet-B7结构的精度为84%,⽽AutoAugment+Baline的精度为84.4%,本⽂的RandAugment+Baline则达到了85.0%的准确率,分别提升了1和0.6个百分点。在⽬标检测⽅⾯,Baline采⽤ResNet结构,添加RandAugment的效果较Baline和其他增⼴⽅法提⾼了1.0~1.3个百分点。在COCO数据集上的表现也有0~0.3%mAP的效果提升。最后,由于本⽂超参数的可解释
性,RandAugment可以⽤来研究数据作⽤与模型、数据集⼤⼩之间的关系。
RandAugment
考虑到以往数据增强⽅法都包含30多个参数,团队也将关注点转移到了如何⼤幅减少数据增强的参数空间。
为了减少参数空间的同时保持数据(图像)的多样性,研究⼈员⽤⽆参数过程替代了学习的策略和概率。
这些策略和概率适⽤于每次变换(transformation),该过程始终选择均匀概率为1/k的变换。
也就是说,给定训练图像的N个变换,RandAugment就能表⽰KN个潜在策略。
最后,需要考虑到的⼀组参数是每个增强失真(augmentation distortion)的⼤⼩。
研究⼈员采⽤线性标度来表⽰每个转换的强度。简单来说,就是每次变换都在0到10的整数范围内,其中,10表⽰给定变换的最⼤范围。
并假设⼀个单⼀的全局失真M(global distortion M)可能就⾜以对所有转换进⾏参数化。
这样,⽣成的算法便包含两个参数N和M,还可以⽤两⾏Python代码简单表⽰:
因为这两个参数都是可⼈为解释的,所以N和M的值越⼤,正则化强度就越⼤。
天津有什么好吃的可以使⽤标准⽅法⾼效地进⾏超参数优化,但是考虑到极⼩的搜索空间,研究⼈员发现朴素⽹格搜索(naive grid arch)是⾮常有效的。实验结果
代码
import cv2
import numpy as np
import cv2
## aug functions
def identity_func(img):
return img
def autocontrast_func(img, cutoff=2):
'''
same output as PIL.ImageOps.autocontrast
'''
n_bins =256
def tune_channel(ch):
n = ch.size
cut = cutoff * n //100
if cut ==0:
high, low = ch.max(), ch.min()
el:
hist = cv2.calcHist([ch],[0],None,[n_bins],[0, n_bins])
low = np.argwhere(np.cumsum(hist)> cut)
low =0if low.shape[0]==0el low[0]
high = np.argwhere(np.cumsum(hist[::-1])> cut)
high = n_bins -1if high.shape[0]==0el n_bins -1- high[0] if high <= low:
table = np.arange(n_bins)
el:
scale =(n_bins -1)/(high - low)
offt =-low * scale
table = np.arange(n_bins)* scale + offt
table[table <0]=0
table[table > n_bins -1]= n_bins -1
table = table.clip(0,255).astype(np.uint8)
return table[ch]
channels =[tune_channel(ch)for ch in cv2.split(img)]
out = (channels)
def equalize_func(img):
'''
same output as PIL.ImageOps.equalize
PIL's implementation is different from cv2.equalize
'''
n_bins =256
def tune_channel(ch):
hist = cv2.calcHist([ch],[0],None,[n_bins],[0, n_bins])
non_zero_hist = hist[hist !=0].reshape(-1)
step = np.sum(non_zero_hist[:-1])//(n_bins -1)
if step ==0:return ch
n = np.empty_like(hist)
n[0]= step //2
n[1:]= hist[:-1]
table =(np.cumsum(n)// step).clip(0,255).astype(np.uint8)
return table[ch]
channels =[tune_channel(ch)for ch in cv2.split(img)]
out = (channels)
return out
def rotate_func(img, degree, fill=(0,0,0)):
'''
like PIL, rotate by degree, not radians
'''
H, W = img.shape[0], img.shape[1]
center = W /2, H /2
M = RotationMatrix2D(center, degree,1)
out = cv2.warpAffine(img, M,(W, H), borderValue=fill)
return out
def solarize_func(img, thresh=128):
'''
same output as PIL.ImageOps.posterize
'''
小学教师主要事迹
table = np.array([el if el < thresh el255- el for el in range(256)])
table = table.clip(0,255).astype(np.uint8)
out = table[img]
return out
def color_func(img, factor=5):
'''
same output as PIL.ImageEnhance.Color
'''
## implementation according to PIL definition, quite slow
# degenerate = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)[:, :, np.newaxis] # out = blend(degenerate, img, factor)
# M = (
# np.eye(3) * factor
# + np.float32([0.114, 0.587, 0.299]).reshape(3, 1) * (1. - factor)
# )[np.newaxis, np.newaxis, :]
M =(
np.float32([
日晷的意思[0.886,-0.114,-0.114],
[-0.587,0.413,-0.587],
[-0.299,-0.299,0.701]])* factor
+ np.float32([[0.114],[0.587],[0.299]])
)
out = np.matmul(img, M).clip(0,255).astype(np.uint8)
def contrast_func(img, factor=5):
"""
same output as PIL.ImageEnhance.Contrast
"""
mean = np.an(img, axis=(0,1))* np.array([0.114,0.587,0.299]))
table = np.array([(
el - mean)* factor + mean
for el in range(256)
]).clip(0,255).astype(np.uint8)
out = table[img]
return out
def brightness_func(img, factor=2):
'''
same output as PIL.ImageEnhance.Contrast
沈子涵
'''
table =(np.arange(256, dtype=np.float32)* factor).clip(0,255).astype(np.uint8)
out = table[img]
return out
def sharpness_func(img, factor=2):
'''
The differences the this result and PIL are all on the 4 boundaries, the center
areas are same
'''
kernel = np.ones((3,3), dtype=np.float32)
kernel[1][1]=5
kernel /=13
degenerate = cv2.filter2D(img,-1, kernel)
if factor ==0.0:
out = degenerate
elif factor ==1.0:
out = img
el:
out = img.astype(np.float32)
degenerate = degenerate.astype(np.float32)[1:-1,1:-1,:]
out[1:-1,1:-1,:]= degenerate + factor *(out[1:-1,1:-1,:]- degenerate)
out = out.astype(np.uint8)
return out
def shear_x_func(img, factor, fill=(0,0,0)):
H, W = img.shape[0], img.shape[1]
M = np.float32([[1, factor,0],[0,1,0]])
out = cv2.warpAffine(img, M,(W, H), borderValue=fill, flags=cv2.INTER_LINEAR).astype(np.uint8) return out
def translate_x_func(img, offt=10, fill=(0,0,0)):
'''
same output as ansform
'''
H, W = img.shape[0], img.shape[1]
M = np.float32([[1,0,-offt],[0,1,0]])
out = cv2.warpAffine(img, M,(W, H), borderValue=fill, flags=cv2.INTER_LINEAR).astype(np.uint8) return out
def translate_y_func(img, offt, fill=(0,0,0)):
'''
same output as ansform
H, W = img.shape[0], img.shape[1]
猴子下山M = np.float32([[1,0,0],[0,1,-offt]])
out = cv2.warpAffine(img, M,(W, H), borderValue=fill, flags=cv2.INTER_LINEAR).astype(np.uint8) return out山夫
def posterize_func(img, bits):
'''
same output as PIL.ImageOps.posterize
'''
out = np.bitwi_and(img, np.uint8(255<<(8- bits)))
return out
def shear_y_func(img, factor, fill=(0,0,0)):
H, W = img.shape[0], img.shape[1]
M = np.float32([[1,0,0],[factor,1,0]])
out = cv2.warpAffine(img, M,(W, H), borderValue=fill, flags=cv2.INTER_LINEAR).astype(np.uint8) return out
def cutout_func(img, pad_size, replace=(0,0,0)):
replace = np.array(replace, dtype=np.uint8)
H, W = img.shape[0], img.shape[1]
rh, rw = np.random.random(2)
pad_size = pad_size //2
ch, cw =int(rh * H),int(rw * W)
x1, x2 =max(ch - pad_size,0),min(ch + pad_size, H)
y1, y2 =max(cw - pad_size,0),min(cw + pad_size, W)
out = py()
out[x1:x2, y1:y2,:]= replace
return out
### level to args
def enhance_level_to_args(MAX_LEVEL):
def level_to_args(level):
return((level / MAX_LEVEL)*1.8+0.1,)
return level_to_args
def shear_level_to_args(MAX_LEVEL, replace_value):
def level_to_args(level):
level =(level / MAX_LEVEL)*0.3
if np.random.random()>0.5: level =-level十三陵景区
return(level, replace_value)
return level_to_args
def translate_level_to_args(translate_const, MAX_LEVEL, replace_value):
def level_to_args(level):
level =(level / MAX_LEVEL)*float(translate_const)
if np.random.random()>0.5: level =-level
return(level, replace_value)
return level_to_args
def cutout_level_to_args(cutout_const, MAX_LEVEL, replace_value):
def level_to_args(level):
山的诗句level =int((level / MAX_LEVEL)* cutout_const)
return(level, replace_value)