【Pytorch卷1】=9=注意力机制-LSTM【监督学习】

更新时间:2023-07-09 12:02:05 阅读: 评论:0

【Pytorch卷1】=9=注意⼒机制-LSTM【监督学习】
⽬录
1. Introduction
使⽤掩码(mask)来形成注意⼒机制。掩码的原理在于通过另⼀层新的权重。
分为强注意⼒和弱注意⼒
Y=Mutltihead(Q, K, V),Y表⽰注意⼒结果,Q为Query,K为key,V为Value,任务可以描述为,以Q的⽅法,在K中找到对应的V。
2. 注意⼒机制-LSTM⾛⼀⾛
建⼀个myLSTM⽹络结构,在模型中搭建LSTM层和全连接层
2.1 依照之前的,做⼀个fashion-mnist集
1import torchvision
ansforms as tranforms
3data_dir = './fashion_mnist/'
4tranform = tranforms.Compo([tranforms.ToTensor()])
5train_datat = torchvision.datats.FashionMNIST(data_dir, train=True, transform=tranform,download=Fal)
6
7print("训练数据集条数",len(train_datat))
8val_datat  = torchvision.datats.FashionMNIST(root=data_dir, train=Fal, transform=tranform)cod是什么意思
9print("测试数据集条数",len(val_datat))
10import pylab
11im = train_datat[0][0]
12im = im.reshape(-1,28)winter games
13pylab.imshow(im)
14pylab.show()
15print("该图⽚的标签为:",train_datat[0][1])
16
17>>##数据集的制作
18import torch
19batch_size = 10
20train_loader = torch.utils.data.DataLoader(train_datat, batch_size=batch_size, shuffle=True)
21test_loader = torch.utils.data.DataLoader(val_datat, batch_size=batch_size, shuffle=Fal)
22
23
24from matplotlib import pyplot as plt
25import numpy as np
26def imshow(img):
27    print("图⽚形状:",np.shape(img))
28    npimg = img.numpy()
29    plt.axis('off')
30    plt.anspo(npimg, (1, 2, 0)))
31
32class = ('T-shirt', 'Trour', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle_Boot')
33sample = iter(train_loader)
34images, labels = ()
tuxedo
35print('样本形状:',np.shape(images))
36print('样本标签:',labels)
37imshow(torchvision.utils.make_grid(images,nrow=batch_size))
38print(','.join('%5s' % class[labels[j]] for j in range(len(images))))
2.2 搭建LSTM层,并引⼊注意⼒层
1#定义myLSTMNet模型类,该模型包括 2个RNN层和1个全连接层
2class Module):
3    def __init__(lf,in_dim, hidden_dim, n_layer, n_class):
4        super(myLSTMNet, lf).__init__()
5        #定义循环神经⽹络层
6        lf.lstm = LSTM(in_dim, hidden_dim, n_layer,batch_first=True)
7        lf.Linear = Linear(hidden_dim*28, n_class) #定义全连接层
8        lf.attention = AttentionSeq(hidden_dim,hard=0.03) #定义注意⼒层
9
10    def forward(lf, t):    #搭建正向结构
11        t, _ = lf.lstm(t)  #进⾏RNN处理
12        t = lf.attention(t)
13        shape(t.shape[0],-1)
14#        t = t[:, -1, :]      #获取RNN⽹络的最后⼀个序列数据
15        out = lf.Linear(t) #进⾏全连接处理
16        return out
2.3 接着搭建注意⼒机制class
1class Module):
2
3    def __init__(lf, hidden_dim,hard= 0):
4        super(AttentionSeq, lf).__init__()
5        lf.hidden_dim = hidden_dim
6        lf.den = Linear(hidden_dim, hidden_dim)
7        lf.hard = hard
8
9    def forward(lf, features, mean=Fal):
10        #[batch,q,dim]
11        batch_size, time_step, hidden_dim = features.size()
12        weight = Tanh()(lf.den(features))
pms
13
14        # mask给负⽆穷使得权重为0
15        mask_idx = torch.sign(torch.abs(features).sum(dim=-1))
16#      mask_idx = mask_idx.unsqueeze(-1).expand(batch_size, time_step, hidden_dim)
17        mask_idx = mask_idx.unsqueeze(-1).repeat(1, 1, hidden_dim)
18
19#注意这⾥torch.where意思是按照第⼀个参数的条件对每个元素进⾏检查,若满⾜条件,则使⽤第⼆个元素进⾏填充,若不满⾜,则使⽤第三个元素填充。20#此时会填充⼀个极⼩的数----不能为零,具体请参考softmax中关于Tahn。
21#torch.full_like是按照第⼀个参数的形状,填充第⼆个参数。
22        weight = torch.where(mask_idx== 1, weight,
23                            torch.full_like(mask_idx,(-2 ** 32 + 1)))
24        weight = anspo(2, 1)
25
26#得出注意⼒分数
27        weight = Softmax(dim=2)(weight)
28        if lf.hard!=0: #hard mode
29            weight = torch.where(weight>lf.hard, weight, torch.full_like(weight,0))
30
31        if mean:
32            weight = an(dim=1)
33            weight = weight.unsqueeze(1)
34            weight = peat(1, hidden_dim, 1)
addle
35        weight = anspo(2, 1)
36#将注意⼒分数作⽤在输⼊值上
37        features_attention = weight * features
38#返回结果
39        return features_attention
40#实例化
41network = myLSTMNet(28, 128, 2, 10)  # 图⽚⼤⼩是28x28(输⼊序列长为28),每层放128个LSTM Cell,构建2层由LSTM形成的⽹络,最终分为10类。
2.4 输⼊数据并训练模型(与之前⼀致)
1device = torch.device("cuda:0" if torch.cuda.is_available() el "cpu")
2print(device)
<(device)
4print(network)#打印⽹络
5
6criterion = CrossEntropyLoss()  #实例化损失函数类
7optimizer = torch.optim.Adam(network.parameters(), lr=.01)
8
9for epoch in range(2): #数据集迭代2次
10    running_loss = 0.0
11    for i, data in enumerate(train_loader, 0): #循环取出批次数据
12        inputs, labels = data
13        inputs = inputs.squeeze(1)
14        inputs, labels = (device), (device) #
仁爱英语八年级上册
15        _grad()#清空之前的梯度
16        outputs = network(inputs)
17        loss = criterion(outputs, labels)#计算损失
18        loss.backward()  #反向传播
19        optimizer.step() #更新参数
20
21        running_loss += loss.item()
22        if i % 1000 == 999:
23            print('[%d, %5d] loss: %.3f' %
24                (epoch + 1, i + 1, running_loss / 2000))
25            running_loss = 0.0
26
27
28
29
30print('Finished Training')
31
32
33#使⽤模型
34dataiter = iter(test_loader)
35images, labels = ()
decent的意思36
37inputs, labels = (device), (device)
38
39
40imshow(torchvision.utils.make_grid(images,nrow=batch_size))
41print('真实标签: ', ' '.join('%5s' % class[labels[j]] for j in range(len(images)))) 42inputs = inputs.squeeze(1)
43outputs = network(inputs)
44_, predicted = torch.max(outputs, 1)
45
46
47print('预测结果: ', ' '.join('%5s' % class[predicted[j]]
48                              for j in range(len(images))))
49
50
京翰1对151#测试模型
52class_correct = list(0. for i in range(10))
53class_total = list(0. for i in range(10))
_grad():
55    for data in test_loader:
56        images, labels = data
57        images = images.squeeze(1)
58        inputs, labels = (device), (device)
59        outputs = network(inputs)
60        _, predicted = torch.max(outputs, 1)
61        predicted = (device)
62          c = (predicted == labels).squeeze()
63        for i in range(10):besides是什么意思
63        for i in range(10):
64            label = labels[i]
65            class_correct[label] += c[i].item()
66            class_total[label] += 1
67
68
69sumacc = 0
70for i in range(10):
71    Accuracy = 100 * class_correct[i] / class_total[i]
72    print('Accuracy of %5s : %2d %%' % (class[i], Accuracy ))
73    sumacc =sumacc+Accuracy
74print('Accuracy of all : %2d %%' % ( sumacc/10. ))
>欧普拉

本文发布于:2023-07-09 12:02:05,感谢您对本站的认可!

本文链接:https://www.wtabcd.cn/fanwen/fan/90/171942.html

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。

标签:注意   数据   模型   机制   定义   损失
相关文章
留言与评论(共有 0 条评论)
   
验证码:
Copyright ©2019-2022 Comsenz Inc.Powered by © 专利检索| 网站地图