首页 > 美文鉴赏

Resnet官方代码pytorch

更新时间:2023-05-05 11:42:55 阅读：评论：0

Resnet官⽅代码pytorch

import torch

as nn

#from .utils import load_state_dict_from_url

#其中为其提供基础函数，model_zoo提供权重数据的下载。

__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',

'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',

'wide_resnet50_2', 'wide_resnet101_2']

#ResNet的⼀个重要设计原则是：当feature map⼤⼩降低⼀半时，feature map的数量增加⼀倍，这保持了⽹络层的复杂度。

model_urls = {

'resnet18': '/models/resnet18-5c106cde.pth',

'resnet34': '/models/resnet34-333f7ec4.pth',

'resnet50': '/models/resnet50-19c8e357.pth',

'resnet101': '/models/resnet101-5d3b4d8f.pth',

'resnet152': '/models/resnet152-b121ed2d.pth',

'resnext50_32x4d': '/models/resnext50_32x4d-7cdf4587.pth',

'resnext101_32x8d': '/models/resnext101_32x8d-8ba56ff5.pth',

'wide_resnet50_2': '/models/wide_resnet50_2-95faca4d.pth',

'wide_resnet101_2': '/models/wide_resnet101_2-32ee1156.pth',

}

#groups: 控制输⼊和输出之间的连接： group=1，输出是所有的输⼊的卷积；group=2，此时相当于有并排的两个卷积层，每个卷积层计算输⼊通道的⼀半，并且产⽣# dilation=1（也就是 padding） groups 是分组卷积参数，这⾥ groups=1 相当于没有分组第⼀个3*3的主要作⽤是在以后⾼维中做

卷积提取信息，第⼆个1*1的def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):

"""3x3 convolution with padding"""

return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,

padding=dilation, groups=groups, bias=Fal, dilation=dilation)

def conv1x1(in_planes, out_planes, stride=1):

"""1x1 convolution"""

return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=Fal)

#注意：这⾥bias设置为Fal，原因是：下⾯使⽤了Batch Normalization，⽽其对隐藏层有去均值的操作，所以这⾥的常数项可以消去

# 因为Batch Normalization有⼀个操作，所以上⾯的数值效果是能由所替代的，因此我们在使⽤Batch Norm的时候，可以忽略各隐藏层的常数项。这样在使⽤梯度下降#BasicBlock是为resnet18、34设计的，由于较浅层的结构可以不使⽤Bottleneck。

class BasicBlock(nn.Module):

expansion = 1

__constants__ = ['downsample']

def __init__(lf, inplanes, planes, stride=1, downsample=None, groups=1,

ba_width=64, dilation=1, norm_layer=None):

super(BasicBlock, lf).__init__()

if norm_layer is None:

norm_layer = nn.BatchNorm2d # #BatchNorm2d最常⽤于卷积⽹络中(防⽌梯度消失或爆炸)，设置的参数就是卷积的输出通道数

#计算各个维度的标准和⽅差，进⾏归⼀化操作

if groups != 1 or ba_width != 64:

rai ValueError('BasicBlock only supports groups=1 and ba_width=64') #为什么要设置这些限制

if dilation > 1:

rai NotImplementedError("Dilation > 1 not supported in BasicBlock")

# v1 and lf.downsample layers downsample the input when stride != 1

lf.bn1 = norm_layer(planes) #防⽌梯度爆炸或消失，planes就是卷积⼀次之后的输出通道数？为什么要对输出的通道数进⾏防爆呢

lf.bn2 = norm_layer(planes)

lf.downsample = downsample #下采样

lf.stride = stride #步长

#解读：这个结构就是由两个3*3的结构为主加上bn和⼀次relu激活组成。其中有个downsample是由于有x+out的操作，要保证这两个可以加起来所以对原始输⼊的x进

def forward(lf, x):

identity = x

out = lf.conv1(x)

out = lf.bn1(out)

out = lf.relu(out) #⼀次卷积，防爆，激活

out = lf.conv2(out)

out = lf.bn2(out) #第⼆次卷积，防爆

if lf.downsample is not None: #当连接的维度不同时，使⽤1*1的卷积核将低维转成⾼维，然后才能进⾏相加

identity = lf.downsample(x) #就是在进⾏下采样，如果需要的话

out += identity #这个时候就会⽤到残差⽹络的特点，f(x)+x # 实现H(x)=F(x)+x或H(x)=F(x)+Wx

out = lf.relu(out)

return out

#看到代码 lf.downsample = downsample，在默认情况downsample=None，表⽰不做downsample，但有⼀个情况需要做，就是⼀个 BasicBlock的分⽀x要与outpu # 剧透⼀下，在resnet⾥的downsample就是⽤⼀个1x1的卷积核处理，变成想要的通道数。为什么要这样做？因为最后要x要和output相加啊，通道不同相加不了。所

class Bottleneck(nn.Module):

#expansion 是对输出通道数的倍乘，注意在基础版本 BasicBlock 中 expansion 是 1，此时相当于没有倍乘，输出的通道数就等于 planes。

expansion = 4 #⼀层⾥⾯最终输出时四倍膨胀

__constants__ = ['downsample']

def __init__(lf, inplanes, planes, stride=1, downsample=None, groups=1,

ba_width=64, dilation=1, norm_layer=None):

super(Bottleneck, lf).__init__()

if norm_layer is None:

norm_layer = nn.BatchNorm2d

width = int(planes * (ba_width / 64.)) * groups #这个值应该是变化的

# v2 and lf.downsample layers downsample the input when stride != 1

lf.bn1 = norm_layer(width)

lf.bn2 = norm_layer(width)

lf.bn3 = norm_layer(planes * lf.expansion)

lf.downsample = downsample

lf.stride = stride

def forward(lf, x):

identity = x #shotcut

out = lf.conv1(x)

out = lf.bn1(out)

out = lf.relu(out) #1x1卷积

out = lf.conv2(out)

out = lf.bn2(out)

out = lf.relu(out) #3x3卷积

out = lf.conv3(out)

out = lf.bn3(out) #1x1 归⼀

#不管是BasicBlock还是Bottleneck，最后都会做⼀个判断是否需要给x做downsample，因为必须要把x的通道数变成与主枝的输出的通道⼀致，才能相加。

if lf.downsample is not None:

identity = lf.downsample(x)

out += identity

out = lf.relu(out)

return out

class ResNet(nn.Module):

def __init__(lf, block, layers, num_class=1000, zero_init_residual=Fal,

groups=1, width_per_group=64, replace_stride_with_dilation=None,

norm_layer=None):

super(ResNet, lf).__init__()

if norm_layer is None:

norm_layer = nn.BatchNorm2d

lf._norm_layer = norm_layer #为什么这么做，是因为在make函数中也要⽤到norm_layer，所以将这个放到了lf中

lf.inplanes = 64 #设置默认输⼊通道

lf.dilation = 1

if replace_stride_with_dilation is None:

# each element in the tuple indicates if we should replace

# the 2x2 stride with a dilated convolution instead

replace_stride_with_dilation = [Fal, Fal, Fal]

if len(replace_stride_with_dilation) != 3:

rai ValueError("replace_stride_with_dilation should be None "

"or a 3-element tuple, got {}".format(replace_stride_with_dilation))

lf.ba_width = width_per_group

bias=Fal) #7x7 输⼊3 输出inplanes 步长为2 填充为3 偏移量为fal

lf.bn1 = norm_layer(lf.inplanes) #归⼀化防爆

lf.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) #最⼤池化3x3 步长为2 填充为1

lf.layer1 = lf._make_layer(block, 64, layers[0])

lf.layer2 = lf._make_layer(block, 128, layers[1], stride=2,

dilate=replace_stride_with_dilation[0])

lf.layer3 = lf._make_layer(block, 256, layers[2], stride=2,

dilate=replace_stride_with_dilation[1])

lf.layer4 = lf._make_layer(block, 512, layers[3], stride=2,

dilate=replace_stride_with_dilation[2])

lf.avgpool = nn.AdaptiveAvgPool2d((1, 1))

lf.fc = nn.Linear(512 * pansion, num_class)

# 对卷积和与BN层初始化，论⽂中也提到过

for m dules():

if isinstance(m, nn.Conv2d):

nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):

stant_(m.weight, 1)

stant_(m.bias, 0)

# Zero-initialize the last BN in each residual branch,

# so that the residual branch starts with zeros, and each residual block behaves like an identity.

# This improves the model by 0.2~0.3% according to /abs/1706.02677

if zero_init_residual:

for m dules():

if isinstance(m, Bottleneck):

stant_(m.bn3.weight, 0)

elif isinstance(m, BasicBlock):

stant_(m.bn2.weight, 0)

#_make_layer ⽅法的第⼀个输⼊参数 block 选择要使⽤的模块是 BasicBlock 还是 Bottleneck 类，第⼆个输⼊参数 planes 是该模块的输出通道数，第三个输⼊参数 b def _make_layer(lf, block, planes, blocks, stride=1, dilate=Fal): #planes参数是“基准通道数”，不是输出通道数不是输出通道数不是输出通道数 norm_layer = lf._norm_layer

downsample = None

previous_dilation = lf.dilation #填充？

if dilate:

lf.dilation *= stride

stride = 1

if stride != 1 or lf.inplanes != planes * pansion: #如果stride不等于1或者维度不匹配的时候的downsample，可以看到也是⽤过⼀个1*1的操作来进⾏升 downsample = nn.Sequential( #⼀个时序器

conv1x1(lf.inplanes, planes * pansion, stride),

norm_layer(planes * pansion),

)

layers = [] #[3，4，6，3]表⽰按次序⽣成3个Bottleneck，4个Bottleneck，6个Bottleneck，

3个Bottleneck。

layers.append(block(lf.inplanes, planes, stride, downsample, lf.groups,

lf.ba_width, previous_dilation, norm_layer)) #该部分是将每个blocks的第⼀个residual结构保存在layers列表中

#这⾥分两个block是因为要将⼀整个Lyaer进⾏output size那⾥，维度是依次下降两倍的，第⼀个是设置了stride=2所以维度下降⼀半，剩下的不需要进⾏维度下降 lf.inplanes = planes * pansion

for _ in range(1, blocks): #该部分是将每个blocks的剩下residual 结构保存在layers列表中，这样就完成了⼀个blocks的构造

layers.append(block(lf.inplanes, planes, ups,

ba_width=lf.ba_width, dilation=lf.dilation,

norm_layer=norm_layer))

return nn.Sequential(*layers)

#ResNet 共有五个阶段，其中第⼀阶段为⼀个 7*7 的卷积，stride = 2，padding = 3，然后经过 BN、ReLU 和 maxpooling，此时特征图的尺⼨已成为输⼊的 1/4

#接下来是四个阶段，也就是代码中 layer1，layer2，layer3，layer4。这⾥⽤ _make_layer 函数产⽣四个 Layer，需要⽤户输⼊每个 layer 的 block 数⽬（即layers列 def forward(lf, x):

x = lf.conv1(x)

x = lf.bn1(x)

x = lf.relu(x)

x = lf.maxpool(x) #第⼀阶段进⾏普通卷积变成原来1/4

#其实所谓的layer1，2，3，4都是由不同参数的_make_layer()⽅法得到的。看_make_layer()的参数，发现了layers[0~3]就是上⾯输⼊的[3，4，6，3]，即layers x = lf.layer1(x)

x = lf.layer2(x)

x = lf.layer3(x)

x = lf.layer4(x)

x = lf.avgpool(x)

x = torch.flatten(x, 1)

x = lf.fc(x)

return x

def _resnet(arch, block, layers, pretrained, progress, **kwargs):

model = ResNet(block, layers, **kwargs)

if pretrained:

state_dict = load_state_dict_from_url(model_urls[arch],

progress=progress)

model.load_state_dict(state_dict)

return model

def resnet18(pretrained=Fal, progress=True, **kwargs):

r"""ResNet-18 model from

`"Deep Residual Learning for Image Recognition" </pdf/1512.03385.pdf>`_

Args:

pretrained (bool): If True, returns a model pre-trained on ImageNet

progress (bool): If True, displays a progress bar of the download to stderr

"""

return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,

**kwargs)

def resnet34(pretrained=Fal, progress=True, **kwargs):

r"""ResNet-34 model from

`"Deep Residual Learning for Image Recognition" </pdf/1512.03385.pdf>`_

Args:

pretrained (bool): If True, returns a model pre-trained on ImageNet

progress (bool): If True, displays a progress bar of the download to stderr

"""

return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,

**kwargs)

def resnet50(pretrained=Fal, progress=True, **kwargs):

r"""ResNet-50 model from

`"Deep Residual Learning for Image Recognition" </pdf/1512.03385.pdf>`_

Args:

pretrained (bool): If True, returns a model pre-trained on ImageNet

progress (bool): If True, displays a progress bar of the download to stderr

"""

return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,

**kwargs)

def resnet101(pretrained=Fal, progress=True, **kwargs):

r"""ResNet-101 model from

`"Deep Residual Learning for Image Recognition" </pdf/1512.03385.pdf>`_

Args:

pretrained (bool): If True, returns a model pre-trained on ImageNet

progress (bool): If True, displays a progress bar of the download to stderr

"""

return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,

**kwargs)

def resnet152(pretrained=Fal, progress=True, **kwargs):

r"""ResNet-152 model from

`"Deep Residual Learning for Image Recognition" </pdf/1512.03385.pdf>`_

Args:

pretrained (bool): If True, returns a model pre-trained on ImageNet

progress (bool): If True, displays a progress bar of the download to stderr

"""

return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,

**kwargs)

def resnext50_32x4d(pretrained=Fal, progress=True, **kwargs):

r"""ResNeXt-50 32x4d model from

`"Aggregated Residual Transformation for Deep Neural Networks" </pdf/1611.05431.pdf>`_

Args:

pretrained (bool): If True, returns a model pre-trained on ImageNet

progress (bool): If True, displays a progress bar of the download to stderr

"""

kwargs['groups'] = 32

kwargs['width_per_group'] = 4

return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],

pretrained, progress, **kwargs)

def resnext101_32x8d(pretrained=Fal, progress=True, **kwargs):

r"""ResNeXt-101 32x8d model from

`"Aggregated Residual Transformation for Deep Neural Networks" </pdf/1611.05431.pdf>`_

Args:

pretrained (bool): If True, returns a model pre-trained on ImageNet

progress (bool): If True, displays a progress bar of the download to stderr

"""

kwargs['groups'] = 32

kwargs['width_per_group'] = 8

return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],

pretrained, progress, **kwargs)

本文发布于:2023-05-05 11:42:55，感谢您对本站的认可！

本文链接：https://www.wtabcd.cn/fanwen/fan/89/857575.html

上一篇：残余应力的表征

下一篇：[Ceres]problem中几个重要函数

标签：通道输出卷积结构维度参数需要改变

留言与评论（共有 0 条评论）