首页 > 美文鉴赏

TextCNN（文本分类）

更新时间:2023-05-27 09:01:02 阅读：评论：0

TextCNN（⽂本分类）TextCNN⽹络结构如图所⽰：

引经据典利⽤TextCNN做⽂本分类基本流程（以句⼦分类为例）：

（1）将句⼦转成词，利⽤词建⽴字典

（2）词转成向量（word2vec，Glove，bert，nn.embedding）

（3）句⼦补0操作变成等长

（4）建TextCNN模型，训练，测试

TextCNN按照流程的⼀个例⼦。

1，预测结果不是很好，句⼦太少

2，没有⽤到复杂的word2vec的模型

3，句⼦太少，没有eval function。

import torch

as nn

functional as F

ntence = [['The movie is great'],['Tom has a headache today'],['I think the apple is bad'],\

['You are so beautiful']]

Label = [1,0,0,1]

test_ntence = ['The apple is great']

class ntence2id:

def __init__(lf,ntence):

< = ntence

lf.dic = {}

lf.words = []

lf.words_num = 0

def n2n(lf,ntence): ##⼤写转⼩写

nten = []

过年有哪些习俗if type(ntence[0])== list:

for n in ntence:

n = n[0].lower()

nten.append([n])

el:

nten.append(ntence)

nten = lf.n2n(nten)

return nten

def countword(lf): ##统计单词个数

>>##[建库过程不涉及到test模块]>>###

for n :

n = n[0].split(' ') ##空格分隔

for word in n:

lf.words.append(word.lower())

lf.words = list(t(lf.words))

lf.words = sorted(lf.words)

lf.words_num = len(lf.words)

return lf.words,lf.words_num

def word2id(lf): ### 创建词汇表

flag = 1

for word in lf.words:

if flag <= lf.words_num:

lf.dic[word] = flag

flag += 1

#print(lf.dic)

return lf.dic

def n2id(lf,ntence): ###

ntence = lf.n2n(ntence)

ntoid = []

for n in ntence:

nten = []

for word in n[0].split():

nten.append(lf.dic[word])

ntoid.append(nten)

return ntoid

def padded(ntence,pad_token): #token'<pad>'

max_len = len(ntence[0])

for i in range(0,len(ntence)-1):

if max_len < len(ntence[i+1]):

max_len = len(ntence[i+1])

i += 1

for i in range(0,len(ntence)):

开元化工

for j in range(0,max_len-len(ntence[i])):

ntence[i].append(pad_token)

return ntence

class ModelEmbeddings(nn.Module):

一流大学名单

def __init__(lf,words_num,embed_size,pad_token):

super(ModelEmbeddings, lf).__init__()

lf.words_num = words_num

lf.Embedding = nn.Embedding(words_num,embed_size,pad_token)

class textCNN(nn.Module):

def __init__(lf,words_num,embed_size,class_num,dropout_rate=0.1):

super(textCNN, lf).__init__()

lf.words_num = words_num什么是阴阳

需求近义词lf.embed_size = embed_size

lf.class_num = class_num

lf.max_pool1 = nn.MaxPool1d(5)

lf.max_pool2 = nn.MaxPool1d(4)

lf.max_pool3 = nn.MaxPool1d(3)

lf.dropout = nn.Dropout(dropout_rate)

lf.linear = nn.Linear(3*3*1,class_num)

# 3 -> out_channels 3 ->kernel_size 1 ->max_pool

def forward(lf,n_embed): #(batch,max_len,embed_size)

n_embed = n_embed.unsqueeze(1) #(batch,in_channels,max_len,embed_size)

conv1 = F.v1(n_embed)) # ->(batch_size,out_channels.size,1)

conv2 = F.v2(n_embed))

conv3 = F.v3(n_embed))

conv1 = torch.squeeze(conv1,dim=3)

conv2 = torch.squeeze(conv2,dim=3)

conv3 = torch.squeeze(conv3,dim=3)

x1 = lf.max_pool1(conv1)

x2 = lf.max_pool2(conv2)

x3 = lf.max_pool3(conv3)

x = torch.cat((x1,x2),dim=1)

x = torch.cat((x,x3),dim=1).squeeze(dim=2)

output = lf.linear(lf.dropout(x))

return output

def train(model,ntence,label):

optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

criterion = nn.CrossEntropyLoss()

steps = 0

best_acc = 0

print ("-"*80)

print('')

for epoch in range(1,2): ##2个epoch

for step,x in enumerate(torch.split(ntence,1,dim=0)):

target = s(1)

体育特长情况target[0] = label[step]

target = sor(target,dtype=torch.long)

<_grad()

output = model(x)

loss = criterion(output, target)

#loss.backward()

loss.backward(retain_graph=True)

optimizer.step()

if step % 2 == 0:

result = torch.max(output,1)[1].view(target.size())

corrects = (result.data == target.data).sum()

accuracy = corrects*100.0/1 ####1 is batch size

超氧化物歧化酶

print('Epoch:',epoch,'step:',step,'- loss: %.6f'% loss.data.item(),\

'acc: %.4f'%accuracy)

return model

if __name__ == '__main__':

test = ntence2id(ntence)

test.n2n(ntence)

word,words_num = untword()

test.word2id()

n_train = test.n2id(ntence)

n_test = test.n2id(test_ntence)

X_train = torch.LongTensor((padded(n_train,0)))

X_test = torch.LongTensor((padded(n_test,0)))

Embedding = ModelEmbeddings(words_num+1,10,0)

X_train_embed = Embedding.Embedding(X_train)

X_test_embed = Embedding.Embedding(X_test)

print(X_train_embed.size())

#print(X_test_embed.size())

## TextCNN

textcnn = textCNN(words_num,10,2)

model = train(textcnn,X_train_embed,Label)

print(torch.max(model(X_test_embed),1)[1])

其中建⽴卷积层时，可以采⽤nn.ModuleList()，因为⽤起来不熟练，就直接展开了。等学好了，再补。

本文发布于:2023-05-27 09:01:02，感谢您对本站的认可！

本文链接：https://www.wtabcd.cn/fanwen/fan/89/936562.html

上一篇：用Python的长短期记忆神经网络进行时间序列预测

下一篇：上海市中心商品房出租协议书上海市房屋租赁协议(三篇)

标签：流程涉及分类训练统计特长转成

留言与评论（共有 0 条评论）