【NLP】textCNN进⾏中⽂⽂本分类(pytorch)textCNN模型详解
实现参考
核⼼代码
⽤conv1D实现的模型 model.py
维度变化
embedding:(batch_size, q_length句⼦长度)--> (batch_size, q_length, embedding_size 词向量维度)
(128, 20) --> (128, 20, 60)
permute(): 修改维度顺序,以便之后的卷积运算
(batch_size, q_length, embedding_size) --> (batch_size, embedding_size, q_length)
(128, 20, 60) --> (128, 60, 20)
conv1D: 以kernel_size = 3为例,(batch_size, embedding_size, q_length) --> (batch_size, kernel_num, conv1d) (128, 60, 20) --> (128, 16, 18) kernel_num=16, conv1d = 20-3+1=18
ps: kernel_num为卷积的输出通道数
relu: 经过激活函数维度不变
max_pool1d: (batch_size, kernel_num, conv1d) --> (batch_size, kernel_num, 1)
(128, 16, 18) --> (128, 16, 1)
squeeze(2): 去掉维数为1的维度
(batch_size, kernel_num, 1) --> (batch_size, kernel_num)
(128, 16, 1) --> (128, 16)
cat(): 连接经过不同的kernel_size卷积池化后的向量
(batch_size, kernel_num) --> (batch_size, kernel_num * kernel_size的数量)
(128, 16) --> (128, 16*3)
PS:每个size经过以上操作得到的向量维度都是(batch_size, kernel_num)(128, 16)
linear:(batch_size, kernel_num * kernel_size的数量) --> (batch_size, class_num分类数⽬)
(128, 16*3) --> (128, 5)
import torch
as nn
o ring import functional as F
import math
init as init
class textCNN(nn.Module):
def __init__(lf, param):
super(textCNN, lf).__init__()
蔬菜英语kernel_num = param['kernel_num'] # output chanel size 16
kernel_size = param['kernel_size'] # 3,4,5
vocab_size = param['vocab_size'] # 22906
embed_dim = param['embed_dim'] # 60hateyou
embed_dim = param['embed_dim'] # 60
dropout = param['dropout'] # 0.5
class_num = param['class_num'] # 5
lf.param = param
hannels, kernel_size, stride=1, padding=0 lf.conv12 = nn.Conv1d(embed_dim, kernel_num, kernel_size[1]) # # 卷积核⾼度与词向量维度⼀致,为60
lf.dropout = nn.Dropout(dropout) # Dropout(p=0.5, inplace=Fal)
lf.fc1 = nn.Linear(len(kernel_size) * kernel_num, class_num) # Linear(in_features=48, out_features=5, bias=True) 将(128,48)==》(128,5) # 对 # 参数:
# in_features - 每个输⼊样本的⼤⼩
# out_features - 每个输出样本的⼤⼩
# bias - 若设置为Fal,这层不会学习偏置。默认值:True
# 形状:
# 输⼊: (N,in_features)
# 输出: (N,out_features)
# 变量:
# weight -形状为(out_features x in_features)的模块中可学习的权值
# bias -形状为(out_features)的模块中可学习的偏置
def init_embed(lf, embed_matrix):
@staticmethod
def conv_and_pool(x, conv):
# torch.Size([128, 60, 20])
x = conv(x)
# 经过⼀维卷积后的⼤⼩ torch.Size([128, 16, 18])
x = F.relu(x)
# 激活层后:torch.Size([128, 16, 18])
x = F.max_pool1d(x, x.size(2)) #(128,16,1) # functional.max_pool1d(input([128, 16, 18]), kernel_size 18) # x.size(2)指H_out的值
x = x.squeeze(2)
# (batch, kernel_num) torch.Size([128, 16]) (128,16,1) .squeeze(2)==> (128,16)
return x
def forward(lf, x):
# x: (batch, ntence_length) (128, 20)
x = lf.embed(x)
# x: (batch, ntence_length, embed_dim) 经过embedding层后(128, 20, 60)
# TODO init embed matrix with pre-trained
x = x.permute(0,2,1) # 将(128, 20, 60)换为(128, 60, 20)
x1 = lf.conv_and_pool(x, lf.conv11) # (batch, kernel_num) torch.Size([128, 16])
人教版初一英语上册x2 = lf.conv_and_pool(x, lf.conv12) # (batch, kernel_num) torch.Size([128, 16])
x3 = lf.conv_and_pool(x, lf.conv13) # (batch, kernel_num)
x = torch.cat((x1, x2, x3), 1) # (batch, 3 * kernel_num) (128,3*16) = (128,48) torch.cat(inputs, dimension=0) → Tensor
x = lf.dropout(x) # torch.Size([128, 48])
logit = F.log_softmax(lf.fc1(x), dim=1)
return logit
def init_weight(lf):
for m dules():
if isinstance(m, nn.Conv1d):
init.xavier_normal(m.weight.data)
if m.bias is not None:b2c是什么意思
m._()
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m._()
elif isinstance(m, nn.Linear):
m.al_(0, 0.01)
m._()
conv2D实现 model_oo.py
维度变化
embedding:(batch_size, q_length句⼦长度)--> (batch_size, q_length, embedding_size 词向量维度)
(128, 20) --> (128, 20, 60)
unsqueeze(): 增加维数为1的维,以便进⾏conv2D
(batch_size, q_length, embedding_size) --> (batch_size, 1, q_length, embedding_size)
(128, 20, 60) --> (128, 1, 20, 60)
conv2D: 以kernel_size = 3为例,(batch_size, 1, q_length, embedding_size) --> (batch_size, kernel_num, conv2d, 1) (128, 1, 20, 60) --> (128, 16, 18, 1) kernel_num=16, conv1d = 20-3+1=18
ps: kernel_num为卷积的输出通道数
squeeze: 去掉维数为1的维度
(batch_size, kernel_num, conv2d, 1) --> (batch_size, kernel_num, conv2d)
(128, 16, 18, 1) --> (128, 16, 18)
relu: 经过激活函数维度不变patterning
max_pool1d: (batch_size, kernel_num, conv2d) --> (batch_size, kernel_num, 1)
(128, 16, 18) --> (128, 16, 1)
squeeze(2): 去掉维数为1的维度
(batch_size, kernel_num, 1) --> (batch_size, kernel_num)
(128, 16, 1) --> (128, 16)
cat(): 连接经过不同的kernel_size卷积池化后的向量
PS:每个size经过以上操作得到的向量维度都是(batch_size, kernel_num)(128, 16)
(batch_size, kernel_num) --> (batch_size, kernel_num * kernel_size的数量)
(128, 16) --> (128, 16*3)
linear:(batch_size, kernel_num * kernel_size的数量) --> (batch_size, class_num分类数⽬)
(128, 16*3) --> (128, 5)
import torch
as nn
import functional as F
import math
class textCNN(nn.Module):
def __init__(lf, param):
super(textCNN, lf).__init__()
ci = 1 # input chanel size
kernel_num = param['kernel_num'] # output chanel size 16
kernel_size = param['kernel_size'] # 3,4,5
vocab_size = param['vocab_size'] # 22906
embed_dim = param['embed_dim'] # 60
dropout = param['dropout'] # 0.5
class_num = param['class_num'] # 5
lf.param = parambeer是什么意思
lf.dropout = nn.Dropout(dropout) # Dropout(p=0.5, inplace=Fal)
lf.fc1 = nn.Linear(len(kernel_size) * kernel_num, class_num) # Linear(in_features=48, out_features=5, bias=True) 将(128,48)==》(128,5) # 对 # 参数:
# in_features - 每个输⼊样本的⼤⼩
# out_features - 每个输出样本的⼤⼩
# bias - 若设置为Fal,这层不会学习偏置。默认值:True
# 形状:
# 输⼊: (N,in_features)
# 输出: (N,out_features)
# 变量:
# weight -形状为(out_features x in_features)的模块中可学习的权值
# bias -形状为(out_features)的模块中可学习的偏置
corndef init_embed(lf, embed_matrix):
@staticmethod
def conv_and_pool(x, conv):
# x: (batch, 1, ntence_length, embed_dim) torch.Size([128, 1, 20, 60])
x = conv(x) # x: (batch, kernel_num, H_out, 1) 经过⼆维卷积后的⼤⼩ torch.Size([128, 16, 18, 1]) 18 = 20-3+1 = q_length - kernel_size + 1
x = F.relu(x.squeeze(3)) # a.squeeze(N) 就是去掉a中指定的维数为⼀的维度 # x: (batch, kernel_num, H_out) 激活层后:torch.Size([128, 16, 18])
x = F.max_pool1d(x, x.size(2)).squeeze(2) # (batch, kernel_num) torch.Size([128, 16]) (128,16,1) .squeeze(2)==> (128,16)
# functional.max_pool1d(input([128, 16, 18]), kernel_size 18) # x.size(2)指H_out的值
return x
def forward(lf, x):
# x: (batch, ntence_length) (128, 20)
x = lf.embed(x) # x: (batch, ntence_length, embed_dim) 经过embedding层后(128, 20, 60)
x = x.unsqueeze(1) # # x: (batch, 1, ntence_length, embed_dim) (128,1,20,60) 在第1维增加维度‘1’使x变为合法的conv2D的输⼊形式, 1 与 conv2D的输
x1 = lf.conv_and_pool(x, lf.conv11) # (batch, kernel_num) torch.Size([128, 16])
x2 = lf.conv_and_pool(x, lf.conv12) # (batch, kernel_num) torch.Size([128, 16])
x3 = lf.conv_and_pool(x, lf.conv13) # (batch, kernel_num)
x = torch.cat((x1, x2, x3), 1) # (batch, 3 * kernel_num) (128,3*16) = (128,48) torch.cat(inputs, dimension=0) → Tensor
x = lf.dropout(x) # torch.Size([128, 48])
logit = F.log_softmax(lf.fc1(x), dim=1)cu
return logit
def init_weight(lf):
for m dules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.al_(0, math.sqrt(2. / n))
if m.bias is not None:
m._()
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m._()merry什么意思
elif isinstance(m, nn.Linear):
m.al_(0, 0.01)
m._()