Resnet官⽅代码pytorch
import torch
as nn
#from .utils import load_state_dict_from_url
#其中 为其提供基础函数,model_zoo提供权重数据的下载。
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
'wide_resnet50_2', 'wide_resnet101_2']
#ResNet的⼀个重要设计原则是:当feature map⼤⼩降低⼀半时,feature map的数量增加⼀倍,这保持了⽹络层的复杂度。
model_urls = {
'resnet18': '/models/resnet18-5c106cde.pth',
'resnet34': '/models/resnet34-333f7ec4.pth',
'resnet50': '/models/resnet50-19c8e357.pth',
'resnet101': '/models/resnet101-5d3b4d8f.pth',
'resnet152': '/models/resnet152-b121ed2d.pth',
'resnext50_32x4d': '/models/resnext50_32x4d-7cdf4587.pth',
'resnext101_32x8d': '/models/resnext101_32x8d-8ba56ff5.pth',
'wide_resnet50_2': '/models/wide_resnet50_2-95faca4d.pth',
'wide_resnet101_2': '/models/wide_resnet101_2-32ee1156.pth',
}
#groups: 控制输⼊和输出之间的连接: group=1,输出是所有的输⼊的卷积;group=2,此时相当于有并排的两个卷积层,每个卷积层计算输⼊通道的⼀半,并且产⽣# dilation=1(也就是 padding) groups 是分组卷积参数,这⾥ groups=1 相当于没有分组第⼀个3*3的主要作⽤是在以后⾼维中做
卷积提取信息,第⼆个1*1的def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=dilation, groups=groups, bias=Fal, dilation=dilation)
def conv1x1(in_planes, out_planes, stride=1):
"""1x1 convolution"""
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=Fal)
#注意:这⾥bias设置为Fal,原因是:下⾯使⽤了Batch Normalization,⽽其对隐藏层有去均值的操作,所以这⾥的常数项可以消去
# 因为Batch Normalization有⼀个操作,所以上⾯的数值效果是能由所替代的,因此我们在使⽤Batch Norm的时候,可以忽略各隐藏层的常数项。这样在使⽤梯度下降#BasicBlock是为resnet18、34设计的,由于较浅层的结构可以不使⽤Bottleneck。
class BasicBlock(nn.Module):
expansion = 1
__constants__ = ['downsample']
def __init__(lf, inplanes, planes, stride=1, downsample=None, groups=1,
ba_width=64, dilation=1, norm_layer=None):
super(BasicBlock, lf).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d # #BatchNorm2d最常⽤于卷积⽹络中(防⽌梯度消失或爆炸),设置的参数就是卷积的输出通道数
#计算各个维度的标准和⽅差,进⾏归⼀化操作
if groups != 1 or ba_width != 64:
rai ValueError('BasicBlock only supports groups=1 and ba_width=64') #为什么要设置这些限制
if dilation > 1:
rai NotImplementedError("Dilation > 1 not supported in BasicBlock")
# v1 and lf.downsample layers downsample the input when stride != 1
lf.bn1 = norm_layer(planes) #防⽌梯度爆炸或消失,planes就是卷积⼀次之后的输出通道数?为什么要对输出的通道数进⾏防爆呢
lf.bn2 = norm_layer(planes)
lf.downsample = downsample #下采样
lf.stride = stride #步长
#解读:这个结构就是由两个3*3的结构为主加上bn和⼀次relu激活组成。其中有个downsample是由于有x+out的操作,要保证这两个可以加起来所以对原始输⼊的x进
def forward(lf, x):
identity = x
identity = x
out = lf.conv1(x)
out = lf.bn1(out)
out = lf.relu(out) #⼀次卷积,防爆,激活
out = lf.conv2(out)
out = lf.bn2(out) #第⼆次卷积,防爆
if lf.downsample is not None: #当连接的维度不同时,使⽤1*1的卷积核将低维转成⾼维,然后才能进⾏相加
identity = lf.downsample(x) #就是在进⾏下采样,如果需要的话
out += identity #这个时候就会⽤到残差⽹络的特点,f(x)+x # 实现H(x)=F(x)+x或H(x)=F(x)+Wx
out = lf.relu(out)
return out
#看到代码 lf.downsample = downsample,在默认情况downsample=None,表⽰不做downsample,但有⼀个情况需要做,就是⼀个 BasicBlock的分⽀x要与outpu # 剧透⼀下,在resnet⾥的downsample就是⽤⼀个1x1的卷积核处理,变成想要的通道数。为什么要这样做?因为最后要x要和output相加啊,通道不同相加不了。所
class Bottleneck(nn.Module):
#expansion 是对输出通道数的倍乘,注意在基础版本 BasicBlock 中 expansion 是 1,此时相当于没有倍乘,输出的通道数就等于 planes。
expansion = 4 #⼀层⾥⾯最终输出时四倍膨胀
__constants__ = ['downsample']
def __init__(lf, inplanes, planes, stride=1, downsample=None, groups=1,
ba_width=64, dilation=1, norm_layer=None):
super(Bottleneck, lf).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
width = int(planes * (ba_width / 64.)) * groups #这个值应该是变化的
# v2 and lf.downsample layers downsample the input when stride != 1
lf.bn1 = norm_layer(width)
lf.bn2 = norm_layer(width)
lf.bn3 = norm_layer(planes * lf.expansion)
lf.downsample = downsample
lf.stride = stride
def forward(lf, x):
identity = x #shotcut
out = lf.conv1(x)
out = lf.bn1(out)
out = lf.relu(out) #1x1卷积
out = lf.conv2(out)
out = lf.bn2(out)
out = lf.relu(out) #3x3卷积
out = lf.conv3(out)
out = lf.bn3(out) #1x1 归⼀
#不管是BasicBlock还是Bottleneck,最后都会做⼀个判断是否需要给x做downsample,因为必须要把x的通道数变成与主枝的输出的通道⼀致,才能相加。
if lf.downsample is not None:
identity = lf.downsample(x)
out += identity
out = lf.relu(out)
return out
class ResNet(nn.Module):
class ResNet(nn.Module):
def __init__(lf, block, layers, num_class=1000, zero_init_residual=Fal,
groups=1, width_per_group=64, replace_stride_with_dilation=None,
norm_layer=None):
super(ResNet, lf).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
lf._norm_layer = norm_layer #为什么这么做,是因为在make函数中也要⽤到norm_layer,所以将这个放到了lf中
lf.inplanes = 64 #设置默认输⼊通道
lf.dilation = 1
if replace_stride_with_dilation is None:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation = [Fal, Fal, Fal]
if len(replace_stride_with_dilation) != 3:
rai ValueError("replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
lf.ba_width = width_per_group
bias=Fal) #7x7 输⼊3 输出inplanes 步长为2 填充为3 偏移量为fal
lf.bn1 = norm_layer(lf.inplanes) #归⼀化防爆
lf.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) #最⼤池化3x3 步长为2 填充为1
lf.layer1 = lf._make_layer(block, 64, layers[0])
lf.layer2 = lf._make_layer(block, 128, layers[1], stride=2,
dilate=replace_stride_with_dilation[0])
lf.layer3 = lf._make_layer(block, 256, layers[2], stride=2,
dilate=replace_stride_with_dilation[1])
lf.layer4 = lf._make_layer(block, 512, layers[3], stride=2,
dilate=replace_stride_with_dilation[2])
lf.avgpool = nn.AdaptiveAvgPool2d((1, 1))
lf.fc = nn.Linear(512 * pansion, num_class)
# 对卷积和与BN层初始化,论⽂中也提到过
for m dules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
stant_(m.weight, 1)
stant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to /abs/1706.02677
if zero_init_residual:
for m dules():
if isinstance(m, Bottleneck):
stant_(m.bn3.weight, 0)
elif isinstance(m, BasicBlock):
stant_(m.bn2.weight, 0)
#_make_layer ⽅法的第⼀个输⼊参数 block 选择要使⽤的模块是 BasicBlock 还是 Bottleneck 类,第⼆个输⼊参数 planes 是该模块的输出通道数,第三个输⼊参数 b def _make_layer(lf, block, planes, blocks, stride=1, dilate=Fal): #planes参数是“基准通道数”,不是输出通道数不是输出通道数不是输出通道数 norm_layer = lf._norm_layer
downsample = None
previous_dilation = lf.dilation #填充?
if dilate:
lf.dilation *= stride
stride = 1
if stride != 1 or lf.inplanes != planes * pansion: #如果stride不等于1或者维度不匹配的时候的downsample,可以看到也是⽤过⼀个1*1的操作来进⾏升 downsample = nn.Sequential( #⼀个时序器
conv1x1(lf.inplanes, planes * pansion, stride),
norm_layer(planes * pansion),
norm_layer(planes * pansion),
)
layers = [] #[3,4,6,3]表⽰按次序⽣成3个Bottleneck,4个Bottleneck,6个Bottleneck,
3个Bottleneck。
layers.append(block(lf.inplanes, planes, stride, downsample, lf.groups,
lf.ba_width, previous_dilation, norm_layer)) #该部分是将每个blocks的第⼀个residual结构保存在layers列表中
#这⾥分两个block是因为要将⼀整个Lyaer进⾏output size那⾥,维度是依次下降两倍的,第⼀个是设置了stride=2所以维度下降⼀半,剩下的不需要进⾏维度下降 lf.inplanes = planes * pansion
for _ in range(1, blocks): #该部分是将每个blocks的剩下residual 结构保存在layers列表中,这样就完成了⼀个blocks的构造
layers.append(block(lf.inplanes, planes, ups,
ba_width=lf.ba_width, dilation=lf.dilation,
norm_layer=norm_layer))
return nn.Sequential(*layers)
#ResNet 共有五个阶段,其中第⼀阶段为⼀个 7*7 的卷积,stride = 2,padding = 3,然后经过 BN、ReLU 和 maxpooling,此时特征图的尺⼨已成为输⼊的 1/4
#接下来是四个阶段,也就是代码中 layer1,layer2,layer3,layer4。这⾥⽤ _make_layer 函数产⽣四个 Layer,需要⽤户输⼊每个 layer 的 block 数⽬(即layers列 def forward(lf, x):
x = lf.conv1(x)
x = lf.bn1(x)
x = lf.relu(x)
x = lf.maxpool(x) #第⼀阶段进⾏普通卷积变成原来1/4
#其实所谓的layer1,2,3,4都是由不同参数的_make_layer()⽅法得到的。看_make_layer()的参数,发现了layers[0~3]就是上⾯输⼊的[3,4,6,3],即layers x = lf.layer1(x)
x = lf.layer2(x)
x = lf.layer3(x)
x = lf.layer4(x)
x = lf.avgpool(x)
x = torch.flatten(x, 1)
x = lf.fc(x)
return x
def _resnet(arch, block, layers, pretrained, progress, **kwargs):
model = ResNet(block, layers, **kwargs)
if pretrained:
state_dict = load_state_dict_from_url(model_urls[arch],
progress=progress)
model.load_state_dict(state_dict)
return model
def resnet18(pretrained=Fal, progress=True, **kwargs):
r"""ResNet-18 model from
`"Deep Residual Learning for Image Recognition" </pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
**kwargs)
def resnet34(pretrained=Fal, progress=True, **kwargs):
r"""ResNet-34 model from
`"Deep Residual Learning for Image Recognition" </pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
**kwargs)
def resnet50(pretrained=Fal, progress=True, **kwargs):
r"""ResNet-50 model from
`"Deep Residual Learning for Image Recognition" </pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
**kwargs)
def resnet101(pretrained=Fal, progress=True, **kwargs):
r"""ResNet-101 model from
`"Deep Residual Learning for Image Recognition" </pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
**kwargs)
def resnet152(pretrained=Fal, progress=True, **kwargs):
r"""ResNet-152 model from
`"Deep Residual Learning for Image Recognition" </pdf/1512.03385.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
**kwargs)
def resnext50_32x4d(pretrained=Fal, progress=True, **kwargs):
r"""ResNeXt-50 32x4d model from
`"Aggregated Residual Transformation for Deep Neural Networks" </pdf/1611.05431.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['groups'] = 32
kwargs['width_per_group'] = 4
return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
pretrained, progress, **kwargs)
def resnext101_32x8d(pretrained=Fal, progress=True, **kwargs):
r"""ResNeXt-101 32x8d model from
`"Aggregated Residual Transformation for Deep Neural Networks" </pdf/1611.05431.pdf>`_
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['groups'] = 32
kwargs['width_per_group'] = 8
return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
pretrained, progress, **kwargs)