CV算法复现(分类算法26):AlexNet(2012年Hinton组)致谢:霹雳吧啦Wz:
⽬录
1 本次要点
1.1 深度学习理论
1. 经过⼀次卷积操作后,图像新尺⼨计算公式:(如果padding [p1, p2]中p1,p2不相等,那么公式中2P就变为P1+P2)(如果结果
值不是整数,pytorch中会⾃动忽略最后⼀⾏以及最后⼀列,以保证N为整数。)
2.
3.
1.2 pytorch框架语法
1. pytorch可以⾃定义⽹络权重的初始化⽅法(见model.py)。护理本科毕业论文
2. pata = list(net.parameters()) #查看模型参数
2 ⽹络简介
2.1 历史意义
果姓2012年ImageNet图像分类冠军⽹络,分类准确率由传统的 70%+直接提升到 80%+。在那年之后,深
度学习开始迅速发展。
2.2 ⽹络亮点
1. ⾸次利⽤ GPU 进⾏⽹络加速训练。
2. 使⽤了 ReLU 激活函数,⽽不是传统的 Sigmoid 激活函数以及 Tanh 激活函数。
3. 在前两层的全连接层中使⽤了 Dropout 随机失活神经元操作,以减少过拟合。
2.3 ⽹络架构
备注:padding: [1, 2]即图像最左边缘加1列0,最右边缘加2列0。图像最上边缘加1⾏0,图像最下边缘加2⾏0。
3 代码结构
慢慢变老
model.py
train.py
predict.py
split_data.py(数据集划分)
3.1 model.py
as nn
import torch
"""
本AlexNet复现相⽐原论⽂,每层的卷积核个数减半。
"""
class AlexNet(nn.Module):
def __init__(lf, num_class=1000, init_weights=Fal):
super(AlexNet, lf).__init__()
# nn.Sequential():将⼀系列层结构进⾏打包。省去每⼀层都⽤⼀个变量去表⽰。
lf.features = nn.Sequential(
nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2), # input[3, 224, 224] output[48, 55, 55]
nn.ReLU(inplace=True), #inplace:通过增加计算量来降低内存使⽤,从⽽可以载⼊更⼤模型(默认Fal)。 nn.MaxPool2d(kernel_size=3, stride=2), # output[48, 27, 27]
nn.Conv2d(48, 128, kernel_size=5, padding=2), # output[128, 27, 27]
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2), # output[128, 13, 13]
nn.Conv2d(128, 192, kernel_size=3, padding=1), # output[192, 13, 13]
nn.ReLU(inplace=True),
nn.Conv2d(192, 192, kernel_size=3, padding=1), # output[192, 13, 13]
nn.ReLU(inplace=True),
nn.Conv2d(192, 128, kernel_size=3, padding=1), # output[128, 13, 13]
nn.ReLU(inplace=True),
星星有多重nn.MaxPool2d(kernel_size=3, stride=2), # output[128, 6, 6]
)
lf.classifier = nn.Sequential(
nn.Dropout(p=0.5),
nn.Linear(128 * 6 * 6, 2048), # 输⼊:128通道*6*6(特征图⼤⼩)(到此之前会拉成1维)
nn.ReLU(inplace=True),
nn.Dropout(p=0.5),
nn.Linear(2048, 2048),
nn.ReLU(inplace=True),
nn.Linear(2048, num_class),
)
if init_weights:
lf._initialize_weights()
def forward(lf, x):
x = lf.features(x)
x = torch.flatten(x, start_dim=1) # torch中顺序[B,C,H,W],start_dim=1就是将C维度拉平。
x = lf.classifier(x)
return x
# 初始化权重⽅式(框架有默认,如果要⾃定义可如下⽅式写)
def _initialize_weights(lf):
for m dules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
形容蓝色的成语
if m.bias is not None:
stant_(m.bias, 0)
elif isinstance(m, nn.Linear):
al_(m.weight, 0, 0.01)
stant_(m.bias, 0)
3.2 train.py
import torch
as nn
from torchvision import transforms, datats, utils
import matplotlib.pyplot as plt
import numpy as np
import numpy as np
import torch.optim as optim
from model import AlexNet
import os
import json
import time
"""
数据集:花分类(5类)
"""
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() el "cpu")
print("using {} device.".format(device))
data_transform = {
"train": transforms.Compo([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),#⽔平随机翻转
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
"val": transforms.Compo([transforms.Resize((224, 224)), # cannot 224, must (224, 224)
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
data_root = os.path.abspath(os.path.wd(), "../..")) #os.getcwd():获取当前绝对路径。"../.."返回到上上层路径。
image_path = os.path.join(data_root, "data_t", "flower_data") # flower data t path
asrt ists(image_path), "{} path does not exist.".format(image_path)
train_datat = datats.ImageFolder(root=os.path.join(image_path, "train"),
transform=data_transform["train"])
train_num = len(train_datat)
# {'daisy':0, 'dandelion':1, 'ros':2, 'sunflower':3, 'tulips':4}
flower_list = train_datat.class_to_idx
cla_dict = dict((val, key) for key, val in flower_list.items())#将键和值顺序反过来。⽬的是让模型预测的结果索引,可直接找到对应的类型。 # write dict into json file
json_str = json.dumps(cla_dict, indent=4)#编码成json格式
with open('class_indices.json', 'w') as json_file:#新建json⽂件并写⼊内容
关于蝴蝶的诗句
json_file.write(json_str)
batch_size = 32
nw = min([os.cpu_count(), batch_size if batch_size > 1 el 0, 8]) # number of workers
print('Using {} dataloader workers every process'.format(nw))
train_loader = torch.utils.data.DataLoader(train_datat,
batch_size=batch_size, shuffle=True,
num_workers=nw)
validate_datat = datats.ImageFolder(root=os.path.join(image_path, "val"),
transform=data_transform["val"])
val_num = len(validate_datat)
validate_loader = torch.utils.data.DataLoader(validate_datat,
batch_size=4, shuffle=Fal,
num_workers=nw)
print("using {} images for training, {} images fot validation.".format(train_num,
# 查看数据集代码 val_num))
# test_data_iter = iter(validate_loader)
# test_image, test_label = test_()
#
# def imshow(img):
# img = img / 2 + 0.5 # unnormalize
# npimg = img.numpy()
# plt.anspo(npimg, (1, 2, 0)))
# plt.show()
#
#
# print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))
# imshow(utils.make_grid(test_image))
net = AlexNet(num_class=5, init_weights=True)
<(device)
loss_function = nn.CrossEntropyLoss()
# pata = list(net.parameters()) #查看模型参数(调试⽤)
optimizer = optim.Adam(net.parameters(), lr=0.0002)
拿手菜save_path = './AlexNet.pth'
best_acc = 0.0
for epoch in range(10):
# 训练阶段
running_loss = 0.0
t1 = time.perf_counter()
for step, data in enumerate(train_loader, start=0):
images, labels = data
<_grad()
outputs = (device))
loss = loss_function(outputs, (device))
loss.backward()#反向传播
optimizer.step()#更新每个节点参数
# print statistics
running_loss += loss.item()
# print train process 打印训练信息
rate = (step + 1) / len(train_loader)
a = "*" * int(rate * 50)
b = "." * int((1 - rate) * 50)
print("\rtrain loss: {:^3.0f}%[{}->{}]{:.3f}".format(int(rate * 100), a, b, loss), end="") print()
print(time.perf_counter()-t1)
# 验证阶段
net.eval() #⾃动判定dropout或BN层是否应该启⽤。
涠洲岛好玩吗
acc = 0.0 # accumulate accurate number / epoch
_grad():#不去计算损失梯度
for val_data in validate_loader:
val_images, val_labels = val_data
outputs = net((device))
predict_y = torch.max(outputs, dim=1)[1]
acc += (predict_y == (device)).sum().item()
val_accurate = acc / val_num
if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), save_path)
print('[epoch %d] train_loss: %.3f test_accuracy: %.3f' %
(epoch + 1, running_loss / step, val_accurate))
print('Finished Training')
if __name__ == '__main__':
main()
训练结果: