CV中的注意力机制(一)——SENet(通道注意力)

更新时间:2023-08-10 22:46:40 阅读: 评论:0

CV中的注意⼒机制(⼀)——SENet(通道注意⼒)
SENet
实现步骤
1. 对输⼊进来的特征层进⾏全局平均池化。
2. 然后进⾏两次全连接,第⼀次全连接神经元个数较少,第⼆次全连接神经元个数和输⼊特征层相同。
3. 在完成两次全连接后,再取⼀次Sigmoid将值固定到0-1之间,此时获得了输⼊特征层每⼀个通道的权值(0-1之间)。tricking
4. 将获得的权值乘上原输⼊特征层即可。
流程图⽰
结合ResNet⽹络实战
左图为原始的residual模块,右图为加⼊SE的residual模块。
测试
uhhh,⾃⼰的本⼦是3070的,就不在ImageNet1k上做测试了,还是在ImageNet-mini浅试⼀下吧。先放出来最终的效果:
从图中很明显的可以看出来,加⼊了SE模块后的ResNet⼤⼤降低了错误率,加⼊SE模块后,计算量会增⼤很多,这也是为什么虽然SE模块能提⾼准确率,但是没有被⼴泛应⽤的最⼤的原因。废话不多说,上代码,注释就免了。。。。。我还是相信各位⼤佬的实⼒的。
SE模块
import torch
as nn
class _block(nn.Module):
def__init__(lf, channels, ratio=16):
super(_block, lf).__init__()
lf.avg_pool = nn.AdaptiveAvgPool2d((1,1))
lf.fc = nn.Sequential(
nn.Linear(channels, channels // ratio, bias=Fal),
nn.ReLU(inplace=True),
nn.Linear(channels // ratio, channels, bias=Fal),
nn.Sigmoid()
)
def forward(lf, x):
b, c, _, _ = x.size()
y = lf.avg_pool(x).view(b, c)
y = lf.fc(y).view(b, c,1,1)
return x * y.expand_as(x)
模型代码
import torch
as nn
from module._module import _block
class SEBottleneck(nn.Module):
expansion =4
def__init__(lf, in_planes, out_planes, stride=(1,1),
downsample=None, reduction=16):
super(SEBottleneck, lf).__init__()
lf.bn1 = nn.BatchNorm2d(out_planes)
lf.bn2 = nn.BatchNorm2d(out_planes)
lf. = _block(out_planes *4, ratio=reduction)
lf.downsample = downsample
lf.stride = stride
def forward(lf, x):
residual = x
out = lf.conv1(x)
out = lf.bn1(out)
out = lf.relu(out)
out = lf.conv2(out)
out = lf.bn2(out)
out = lf.relu(out)
out = lf.conv3(out)
out = lf.bn3(out)
out = lf.(out)
if lf.downsample is not None:
residual = lf.downsample(x)
out += residual
out = lf.relu(out)
return out
class ResNet(nn.Module):
def__init__(lf, block, blocks_num, num_class=1000, include_top=True):
super(ResNet, lf).__init__()
lf.include_top = include_top
lf.in_channel =64
padding=3, bias=Fal)
lf.bn1 = nn.BatchNorm2d(lf.in_channel)
advice的用法lf.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
lf.layer1 = lf._make_layer(block,64, blocks_num[0])
lf.layer2 = lf._make_layer(block,128, blocks_num[1], stride=2)
lf.layer3 = lf._make_layer(block,256, blocks_num[2], stride=2)
lf.layer4 = lf._make_layer(block,512, blocks_num[3], stride=2) if lf.include_top:
lf.avgpool = nn.AdaptiveAvgPool2d((1,1))# output size = (1, 1)
lf.fc = nn.Linear(512* pansion, num_class)
for m dules():
for m dules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
stant_(m.weight,1)
stant_(m.bias,0)
def_make_layer(lf, block, channel, block_num, stride=(1,1)):
treadownsample =None
if stride !=1or lf.in_channel != channel * pansion:
2013年四级广州downsample = nn.Sequential(
nn.Conv2d(lf.in_channel, channel * pansion, kernel_size=(1,1),
stride=stride, bias=Fal),
nn.BatchNorm2d(channel * pansion))
layers =[]
layers.append(block(lf.in_channel,
channel,
downsample=downsample,
stride=stride))
lf.in_channel = channel * pansion
for _ in range(1, block_num):
layers.append(block(lf.in_channel,
channel))
return nn.Sequential(*layers)
def forward(lf, x):
x = lf.conv1(x)
x = lf.bn1(x)
x = lf.relu(x)
x = lf.maxpool(x)
x = lf.layer1(x)
x = lf.layer2(x)
x = lf.layer3(x)
x = lf.layer4(x)
if lf.include_top:
x = lf.avgpool(x)
x = torch.flatten(x,1)
x = lf.fc(x)
return x
def _resnet50(num_class=1000, include_top=True):
return ResNet(SEBottleneck,[3,4,6,3], num_class=num_class, include_top=include_top)
训练及测试
import os
import torch
import numpy as np
as nn
from tqdm import tqdm
import torch.optim as optim
import matplotlib.pylab as plt
from module._resnet import _resnet50
snet import resnet50
from data import _main
def main():
device = torch.device('cuda:0'if torch.cuda.is_available()el'cpu')
print('Using {} device for train.'.format(device))
print('Using {} device for train.'.format(device))
train_num, train_loader, val_num, val_loader = _main()
weights_path =r'resnet50-60a8950a85b2b.pkl'
model = _resnet50(num_class=100)
#
# weights_path = r'resnet50-0676ba61.pth'
# model = resnet50(num_class=100)
pre_weights = torch.load(weights_path, map_location=device)
pre_dict ={k: v for k, v in pre_weights.items()if model.state_dict()[k].numel()== v.numel()}    missing_keys, unexpected_keys = model.load_state_dict(pre_dict, strict=Fal)
<(device)
loss_function = nn.CrossEntropyLoss()
params =[p for p in model.parameters()quires_grad]
# optimizer = optim.SGD(params, lr=0.3, momentum=0.9, weight_decay=1e-4)
optimizer = optim.Adam(params, lr=2e-4)
epochs =40
save_path ='_resnet50.pkl'
# save_path = 'resnet50.pth'
best_acc =0.0
train_steps =len(train_loader)
for epoch in range(epochs):
running_loss =0.0
train_bar = tqdm(train_loader)
for step, data in enumerate(train_bar):
images, labels = data
<_grad()
outputs = (device))
loss = loss_function(outputs, (device))
loss.backward()
optimizer.step()
running_loss += loss.item()
train_bar.desc ='train epoch[{}/{}] loss:{:.3f}'.format(epoch +1, epochs, loss)
loss_list.append(running_loss / train_steps)
model.eval()
acc =0.0angry是什么意思
elife什么意思
_grad():
val_bar = tqdm(val_loader)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = model((device))
_, predice_y = torch.max(outputs, dim=1)
late的反义词
acc += torch.eq(predice_y, (device)).sum().item()
val_acc = acc / val_num
print('[epoch %d] training loss:%.3f val_accuracy:%.3f'
%(epoch +1, running_loss / train_steps, val_acc))reforce
if val_acc > best_acc:
best_acc = val_acc
torch.save(model.state_dict(), save_path)journalist
print('Finished Training!')
print('Best accuracy is {}'.format(best_acc))
if __name__ =='__main__':
main()
欢迎转载与提问

本文发布于:2023-08-10 22:46:40,感谢您对本站的认可!

本文链接:https://www.wtabcd.cn/fanwen/fan/78/1129037.html

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。

标签:模块   特征   代码   连接   没有
相关文章
留言与评论(共有 0 条评论)
   
验证码:
推荐文章
排行榜
Copyright ©2019-2022 Comsenz Inc.Powered by © 专利检索| 网站地图