>>##[建库过程不涉及到test模块]>>###for n :
n = n[0].split(' ') ##空格分隔
for word in n:
lf.words.append(word.lower())
lf.words = list(t(lf.words))
lf.words = sorted(lf.words)
lf.words_num = len(lf.words)
return lf.words,lf.words_num
def word2id(lf): ### 创建词汇表
flag = 1
for word in lf.words:
if flag <= lf.words_num:
lf.dic[word] = flag
flag += 1
#print(lf.dic)
return lf.dic
def n2id(lf,ntence): ###
ntence = lf.n2n(ntence)
ntoid = []
for n in ntence:
nten = []
for word in n[0].split():
nten.append(lf.dic[word])
ntoid.append(nten)
return ntoid
def padded(ntence,pad_token): #token'<pad>'
max_len = len(ntence[0])
for i in range(0,len(ntence)-1):
if max_len < len(ntence[i+1]):
max_len = len(ntence[i+1])
i += 1
for i in range(0,len(ntence)):
开元化工
for j in range(0,max_len-len(ntence[i])):
ntence[i].append(pad_token)
return ntence
class ModelEmbeddings(nn.Module):
一流大学名单
def __init__(lf,words_num,embed_size,pad_token):
super(ModelEmbeddings, lf).__init__()
lf.words_num = words_num
lf.Embedding = nn.Embedding(words_num,embed_size,pad_token)
class textCNN(nn.Module):
def __init__(lf,words_num,embed_size,class_num,dropout_rate=0.1):
def __init__(lf,words_num,embed_size,class_num,dropout_rate=0.1):
super(textCNN, lf).__init__()
lf.words_num = words_num什么是阴阳
需求近义词lf.embed_size = embed_size
lf.class_num = class_num
lf.max_pool1 = nn.MaxPool1d(5)
lf.max_pool2 = nn.MaxPool1d(4)
lf.max_pool3 = nn.MaxPool1d(3)
lf.dropout = nn.Dropout(dropout_rate)
lf.linear = nn.Linear(3*3*1,class_num)
# 3 -> out_channels 3 ->kernel_size 1 ->max_pool
def forward(lf,n_embed): #(batch,max_len,embed_size)
n_embed = n_embed.unsqueeze(1) #(batch,in_channels,max_len,embed_size)
conv1 = F.v1(n_embed)) # ->(batch_size,out_channels.size,1)
conv2 = F.v2(n_embed))
conv3 = F.v3(n_embed))
conv1 = torch.squeeze(conv1,dim=3)
conv2 = torch.squeeze(conv2,dim=3)
conv3 = torch.squeeze(conv3,dim=3)
x1 = lf.max_pool1(conv1)
x2 = lf.max_pool2(conv2)
x3 = lf.max_pool3(conv3)
x = torch.cat((x1,x2),dim=1)
x = torch.cat((x,x3),dim=1).squeeze(dim=2)
output = lf.linear(lf.dropout(x))
return output
def train(model,ntence,label):
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()
steps = 0
best_acc = 0
print ("-"*80)
print('')
for epoch in range(1,2): ##2个epoch
for step,x in enumerate(torch.split(ntence,1,dim=0)):
target = s(1)
体育特长情况target[0] = label[step]
target = sor(target,dtype=torch.long)
<_grad()
output = model(x)
loss = criterion(output, target)
#loss.backward()
loss.backward(retain_graph=True)
optimizer.step()
if step % 2 == 0:
result = torch.max(output,1)[1].view(target.size())
corrects = (result.data == target.data).sum()
accuracy = corrects*100.0/1 ####1 is batch size
超氧化物歧化酶
print('Epoch:',epoch,'step:',step,'- loss: %.6f'% loss.data.item(),\
print('Epoch:',epoch,'step:',step,'- loss: %.6f'% loss.data.item(),\
'acc: %.4f'%accuracy)
return model
if __name__ == '__main__':
test = ntence2id(ntence)
test.n2n(ntence)
word,words_num = untword()
test.word2id()
n_train = test.n2id(ntence)
n_test = test.n2id(test_ntence)
X_train = torch.LongTensor((padded(n_train,0)))
X_test = torch.LongTensor((padded(n_test,0)))
Embedding = ModelEmbeddings(words_num+1,10,0)
X_train_embed = Embedding.Embedding(X_train)
X_test_embed = Embedding.Embedding(X_test)
print(X_train_embed.size())
#print(X_test_embed.size())
## TextCNN
textcnn = textCNN(words_num,10,2)
model = train(textcnn,X_train_embed,Label)
print(torch.max(model(X_test_embed),1)[1])
其中建⽴卷积层时,可以采⽤nn.ModuleList(),因为⽤起来不熟练,就直接展开了。等学好了,再补。