最后,需要考虑到的⼀组参数是每个增强失真(augmentation distortion)的⼤⼩。
并假设⼀个单⼀的全局失真M(global distortion M)可能就⾜以对所有转换进⾏参数化。
可以使⽤标准⽅法⾼效地进⾏超参数优化,但是考虑到极⼩的搜索空间,研究⼈员发现朴素⽹格搜索(naive grid arch)是⾮常有效的。实验结果
import cv2
import numpy as np
## aug functions
def identity_func(img):
return img
def autocontrast_func(img, cutoff=2):
same output as PIL.ImageOps.autocontrast
n_bins =256
def tune_channel(ch):
n = ch.size
cut = cutoff * n //100
if cut ==0:
high, low = ch.max(), ch.min()
hist = cv2.calcHist([ch],[0],None,[n_bins],[0, n_bins])
low = np.argwhere(np.cumsum(hist)> cut)
low =0if low.shape[0]==0else low[0]
high = np.argwhere(np.cumsum(hist[::-1])> cut)
high = n_bins -1if high.shape[0]==0else n_bins -1- high[0]
if high <= low:
table = np.arange(n_bins)
scale =(n_bins -1)/(high - low)
offt =-low * scale
table = np.arange(n_bins)* scale + offt
table[table <0]=0
table[table > n_bins -1]= n_bins -1
table = table.clip(0,255).astype(np.uint8)
return table[ch]
channels =[tune_channel(ch)for ch in cv2.split(img)]
out = (channels)
def equalize_func(img):
same output as PIL.ImageOps.equalize
PIL's implementation is different from cv2.equalize
n_bins =256
def tune_channel(ch):
hist = cv2.calcHist([ch],[0],None,[n_bins],[0, n_bins])
non_zero_hist = hist[hist !=0].reshape(-1)
step = np.sum(non_zero_hist[:-1])//(n_bins -1)
if step ==0:return ch
n = np.empty_like(hist)
n[0]= step //2
n[1:]= hist[:-1]
table =(np.cumsum(n)// step).clip(0,255).astype(np.uint8)
return table[ch]
channels =[tune_channel(ch)for ch in cv2.split(img)]
out = (channels)
return out
def rotate_func(img, degree, fill=(0,0,0)):
like PIL, rotate by degree, not radians
H, W = img.shape[0], img.shape[1]
center = W /2, H /2
M = RotationMatrix2D(center, degree,1)
out = cv2.warpAffine(img, M,(W, H), borderValue=fill)
return out
def solarize_func(img, thresh=128):
same output as PIL.ImageOps.posterize
table = np.array([el if el < thresh else 255- el for el in range(256)])
table = table.clip(0,255).astype(np.uint8)
out = table[img]
return out
def color_func(img, factor=5):
same output as PIL.ImageEnhance.Color
## implementation according to PIL definition, quite slow
# degenerate = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)[:, :, np.newaxis] # out = blend(degenerate, img, factor)
# M = (
# np.eye(3) * factor
# + np.float32([0.114, 0.587, 0.299]).reshape(3, 1) * (1. - factor)
# )[np.newaxis, np.newaxis, :]
M =(
[-0.299,-0.299,0.701]])* factor
+ np.float32([[0.114],[0.587],[0.299]])
out = np.matmul(img, M).clip(0,255).astype(np.uint8)
def contrast_func(img, factor=5):
same output as PIL.ImageEnhance.Contrast
mean = np.sum(np.mean(img, axis=(0,1))* np.array([0.114,0.587,0.299]))
table = np.array([(
el - mean)* factor + mean
for el in range(256)
out = table[img]
return out
def brightness_func(img, factor=2):
same output as PIL.ImageEnhance.Contrast
table =(np.arange(256, dtype=np.float32)* factor).clip(0,255).astype(np.uint8)
out = table[img]
return out
def sharpness_func(img, factor=2):
The differences the this result and PIL are all on the 4 boundaries, the center
areas are same
kernel = np.ones((3,3), dtype=np.float32)
kernel /=13
degenerate = cv2.filter2D(img,-1, kernel)
if factor ==0.0:
out = degenerate
elif factor ==1.0:
out = img
out = img.astype(np.float32)
degenerate = degenerate.astype(np.float32)[1:-1,1:-1,:]
out[1:-1,1:-1,:]= degenerate + factor *(out[1:-1,1:-1,:]- degenerate)
out = out.astype(np.uint8)
return out
def shear_x_func(img, factor, fill=(0,0,0)):
H, W = img.shape[0], img.shape[1]
M = np.float32([[1, factor,0],[0,1,0]])
out = cv2.warpAffine(img, M,(W, H), borderValue=fill, flags=cv2.INTER_LINEAR).astype(np.uint8)
return out
def translate_x_func(img, offt=10, fill=(0,0,0)):
same output as ansform
H, W = img.shape[0], img.shape[1]
M = np.float32([[1,0,-offt],[0,1,0]])
out = cv2.warpAffine(img, M,(W, H), borderValue=fill, flags=cv2.INTER_LINEAR).astype(np.uint8)
return out
def translate_y_func(img, offt, fill=(0,0,0)):
same output as ansform
H, W = img.shape[0], img.shape[1]
M = np.float32([[1,0,0],[0,1,-offt]])
out = cv2.warpAffine(img, M,(W, H), borderValue=fill, flags=cv2.INTER_LINEAR).astype(np.uint8)
return out
def posterize_func(img, bits):
same output as PIL.ImageOps.posterize
out = np.bitwise_and(img, np.uint8(255<<(8- bits)))
return out
def shear_y_func(img, factor, fill=(0,0,0)):
H, W = img.shape[0], img.shape[1]
M = np.float32([[1,0,0],[factor,1,0]])
out = cv2.warpAffine(img, M,(W, H), borderValue=fill, flags=cv2.INTER_LINEAR).astype(np.uint8) return out
def cutout_func(img, pad_size, replace=(0,0,0)):
replace = np.array(replace, dtype=np.uint8)
H, W = img.shape[0], img.shape[1]
rh, rw = np.random.random(2)
pad_size = pad_size //2
ch, cw =int(rh * H),int(rw * W)
x1, x2 =max(ch - pad_size,0),min(ch + pad_size, H)
y1, y2 =max(cw - pad_size,0),min(cw + pad_size, W)
out = py()
out[x1:x2, y1:y2,:]= replace
return out
### level to args
def enhance_level_to_args(MAX_LEVEL):
def level_to_args(level):
return((level / MAX_LEVEL)*1.8+0.1,)
return level_to_args
def shear_level_to_args(MAX_LEVEL, replace_value):
def level_to_args(level):
level =(level / MAX_LEVEL)*0.3
if np.random.random()>0.5: level =-level十三陵景区
return(level, replace_value)
return level_to_args
def translate_level_to_args(translate_const, MAX_LEVEL, replace_value):
def level_to_args(level):
level =(level / MAX_LEVEL)*float(translate_const)
if np.random.random()>0.5: level =-level
return(level, replace_value)
return level_to_args
def cutout_level_to_args(cutout_const, MAX_LEVEL, replace_value):
def level_to_args(level):
level =int((level / MAX_LEVEL)* cutout_const)
return(level, replace_value)