残差网络学习及实现

更新时间:2023-07-27 11:36:05 阅读：评论：0

残差⽹络学习及实现

残差⽹络ResNet由⽬前已经出到V2版本了，那么V2和V1由何不同呢，以下是阅读论⽂后的个⼈总结：

⾸先直接上图：

左边是v1，右边是v2，可以看出其主要差别是在BN和ReLU相对weight和addition的操作顺序上，即论⽂中提到的post-activation（后激活）还是pre-activation（先激活）的区别。

结合计算公式，可以看出对于V1来说f为ReLU函数，⽽V2为直接的identity mapping（恒等映射）。这样做可以带来两个好处：1.对于深层的⽹络（论⽂实验中使⽤1001层的ResNet）优化来说更简单，因为ReLU的特性所致，信号为负时，shortcut不起作⽤，所以初期训练会⼗分慢。⽽采⽤identity mapping使得⽆论训练初期还是末期，信号都能顺利在各层传输，也使得⽹络更容易训练。当然这种现象在层数不⾼时并不严重（实验使⽤低于164层）2.减少过拟合现象，在V1中虽然对输⼊进⾏了BN正则化，但是加上shortcut后，传到下个层的weight层的信号未正则化。⽽V2中对于传⼊每个层的信号都正则化可以帮助减少过拟合现象。

童话镇第二季

总结来说，对于层数较少的⽹络可以继续使⽤V1 block，对于层数较多使⽤V2 block。

最后，使⽤pytorch实现resnet：

# ResNet34 u v1 and v2

from torch import nn

import functional as F

class ResidualBlockV1(nn.Module):

# Residual Block v1

少儿读物排行榜def__init__(lf, inchannel, outchannel, stride=1, shortcut=None):

super(ResidualBlockV1, lf).__init__()

lf.basic = nn.Sequential(

nn.Conv2d(inchannel, outchannel,3, stride,1, bias=Fal),

nb是什么意思

nn.BatchNorm2d(outchannel),

nn.ReLU(inplace=True),

nn.Conv2d(outchannel, outchannel,3,1,1, bias=Fal),

三校生高复nn.BatchNorm2d(outchannel))

lf.shortcut = shortcut #shortcut function is customizable

def forward(lf, x):

什么是商务英语out = lf.basic(x)

residual = x if lf.shortcut is None el lf.shortcut(x)

out += residual

lu(out)

class ResidualBlockV2(nn.Module):

# Residual Block v2

def__init__(lf, inchannel, outchannel, stride=1, shortcut=None):

super(ResidualBlockV2, lf).__init__()

lf.basic = nn.Sequential(

nn.BatchNorm2d(inchannel),

nn.ReLU(inplace=True),

nn.Conv2d(inchannel, outchannel,3, stride,1, bias=Fal),

nn.BatchNorm2d(outchannel),

nn.ReLU(inplace=True),

nn.Conv2d(outchannel, outchannel,3,1,1, bias=Fal))

lf.shortcut = shortcut #shortcut function is customizable

def forward(lf, x):

out = lf.basic(x)realized

residual = x if lf.shortcut is None el lf.shortcut(x)

out += residual

return out

class make_layer(nn.Module):

# Reusable layer

def__init__(lf,

inchannel,

accidentlyoutchannel,

block_num,

stride=1,

u_v1_block=True):

super(make_layer, lf).__init__()

layers =[]

shortcut = nn.Sequential(

nn.Conv2d(inchannel, outchannel,1, stride, bias=Fal),

nn.BatchNorm2d(outchannel))

if u_v1_block:

layers.append(ResidualBlockV1(inchannel, outchannel, stride, shortcut))

for i in range(1, block_num):

layers.append(ResidualBlockV1(outchannel, outchannel)) el:

layers.append(ResidualBlockV2(inchannel, outchannel, stride, shortcut))

for i in range(1, block_num):

layers.append(ResidualBlockV2(outchannel, outchannel))

lf.layer = nn.Sequential(*layers)

def forward(lf, x):

out = lf.layer(x)

return out

class ResNet34V1(nn.Module):

def__init__(lf):

super(ResNet34V1, lf).__init__()

lf.pre = nn.Sequential(

nn.Conv2d(3,64,7,2,3, bias=Fal),

nn.BatchNorm2d(64),

nn.ReLU(inplace=True),

nn.MaxPool2d(3,2,1))

# build each layer

lf.layer1 = make_layer(64,64,3)

lf.layer2 = make_layer(64,128,4, stride=2)

lf.layer3 = make_layer(128,256,6, stride=2)

lf.layer4 = make_layer(256,512,3, stride=2)

lf.fc = nn.Linear(512,1000)

def forward(lf, x):

x = lf.pre(x)

x = lf.layer1(x)

x = lf.layer2(x)

x = lf.layer3(x)

x = lf.layer4(x)

x = F.avg_pool2d(x,7)

x = x.view(x.size(0),-1)

esp是什么return lf.fc(x)

class ResNet34V2(nn.Module):

def__init__(lf):

super(ResNet34V2, lf).__init__()

lf.pre = nn.Sequential(

nn.Conv2d(3,64,7,2,3, bias=Fal),

nn.BatchNorm2d(64),

nn.ReLU(inplace=True),

nn.MaxPool2d(3,2,1))

# build each layer野心英文

lf.layer1 = make_layer(64,64,3, u_v1_block=Fal)

lf.layer2 = make_layer(64,128,4, stride=2, u_v1_block=Fal) lf.layer3 = make_layer(128,256,6, stride=2, u_v1_block=Fal) lf.layer4 = make_layer(256,512,3, stride=2, u_v1_block=Fal) lf.fc = nn.Linear(512,1000)

def forward(lf, x):

x = lf.pre(x)英豪教育

x = lf.layer1(x)

x = lf.layer2(x)

x = lf.layer3(x)

x = lf.layer4(x)

x = F.avg_pool2d(x,7)

x = x.view(x.size(0),-1)

return lf.fc(x)

print('This is ResNet34 v1:')

print(ResNet34V1())

print('This is ResNet34 v2:')

print(ResNet34V2())

参考：

本文发布于:2023-07-27 11:36:05，感谢您对本站的认可！

本文链接：https://www.wtabcd.cn/fanwen/fan/90/190272.html

上一篇：老人与海第七部分难点注释

下一篇：deeplabv3+系列之ResNet骨干网络

标签：现象正则减少学习拟合帮助信号

留言与评论（共有 0 条评论）