首页 > 英文翻译

TransE模型的简单介绍TransE模型的python代码实现

更新时间:2023-06-16 19:26:29 阅读：评论：0

def dataloader(file1, file2, file3):

print("")

entity =[]

relation =[]

with open(file2,'r')as f1,open(file3,'r')as f2:

lines1 = f1.readlines()

lines2 = f2.readlines()

for line in lines1:

line = line.strip().split('\t')

if len(line)!=2:

continue

entities2id[line[0]]= line[1]

entity.append(line[1])

for line in lines2:

line = line.strip().split('\t')

if len(line)!=2:

continue

relations2id[line[0]]= line[1]

relation.append(line[1])

triple_list =[]

textbookwith codecs.open(file1,'r')as f:

content = f.readlines()

for line in content:

triple = line.strip().split("\t")

if len(triple)!=3:

continue

h_ = entities2id[triple[0]]

r_ = relations2id[triple[1]]

t_ = entities2id[triple[2]]

triple_list.append([h_, r_, t_])

print("Complete load. entity : %d , relation : %d , triple : %d"%(

上海世外小学学费len(entity),len(relation),len(triple_list)))

return entity, relation, triple_list

def norm_l1(h, r, t):

return np.sum(np.fabs(h + r - t))

def norm_l2(h, r, t):

return np.sum(np.square(h + r - t))

class TransE:

def__init__(lf, entity, relation, triple_list, embedding_dim=50, lr=0.01, margin=1.0, norm=1): lf.entities = entity

lf.dimension = embedding_dim

lf.learning_rate = lr

lf.margin = margin

< = norm

lf.loss =0.0

def data_initiali(lf):

entityVectorList ={}

relationVectorList ={}

for entity ities:

entity_vector = np.random.uniform(-6.0/ np.sqrt(lf.dimension),6.0/ np.sqrt(lf.dimension),

lf.dimension)

entityVectorList[entity]= entity_vector

for relation lations:

relation_vector = np.random.uniform(-6.0/ np.sqrt(lf.dimension),6.0/ np.sqrt(lf.dimension),

lf.dimension)

relation_vector = lf.normalization(relation_vector)

relationVectorList[relation]= relation_vector

def normalization(lf, vector):

return vector / (vector)

def training_run(lf, epochs=1, nbatches=100, out_file_title =''):

batch_size =int(iples)/ nbatches)

print("batch size: ", batch_size)

for epoch in range(epochs):

start = time.time()

lf.loss =0.0

# Normali the embedding of the entities to 1

for entity ities.keys():

for batch in range(nbatches):

batch_samples = random.iples, batch_size)

Tbatch =[]

for sample in batch_samples:

corrupted_sample = copy.deepcopy(sample)

pr = np.random.random(1)[0]

if pr >0.5:

# change the head entity

corrupted_sample[0]= random.ities.keys(),1)[0]

while corrupted_sample[0]== sample[0]:

corrupted_sample[0]= random.ities.keys(),1)[0]

el:

# change the tail entity

corrupted_sample[2]= random.ities.keys(),1)[0]

while corrupted_sample[2]== sample[2]:

corrupted_sample[2]= random.ities.keys(),1)[0]

if(sample, corrupted_sample)not in Tbatch:

Tbatch.append((sample, corrupted_sample))

lf.update_triple_embedding(Tbatch)

end = time.time()

print("epoch: ", epoch,"cost time: %s"%(round((end - start),3)))zarva

print("running loss: ", lf.loss)

with codecs.open(out_file_title +"TransE_entity_"+str(lf.dimension)+"dim_batch"+str(batch_size),"w")as f1:

for e ities.keys():

# f1.write("\t")

# f1.write(e + "\t")

f1.write(str(ities[e])))

f1.write("\n")

with codecs.open(out_file_title +"TransE_relation_"+str(lf.dimension)+"dim_batch"+str(batch_size),"w")as f2: for r lations.keys():

for r lations.keys():

# f2.write("\t")lng是什么意思

# f2.write(r + "\t")

f2.write(str(lations[r])))

f2.write("\n")

def update_triple_embedding(lf, Tbatch):

# deepcopy 可以保证，即使list嵌套list也能让各层的地址不同，即这⾥copy_entity 和

# entitles中所有的elements都不同

copy_entity = copy.ities)

copy_relation = copy.lations)

for correct_sample, corrupted_sample in Tbatch:

correct_copy_head = copy_entity[correct_sample[0]]

correct_copy_tail = copy_entity[correct_sample[2]]

世界大学排名relation_copy = copy_relation[correct_sample[1]]les是什么意思哦

corrupted_copy_head = copy_entity[corrupted_sample[0]]

corrupted_copy_tail = copy_entity[corrupted_sample[2]]

correct_head = lf.entities[correct_sample[0]]

correct_tail = lf.entities[correct_sample[2]]

relation = lf.relations[correct_sample[1]]

corrupted_head = lf.entities[corrupted_sample[0]]

corrupted_tail = lf.entities[corrupted_sample[2]]

# calculate the distance of the triples

母亲节快乐英语怎么说

==1:

correct_distance = norm_l1(correct_head, relation, correct_tail)

corrupted_distance = norm_l1(corrupted_head, relation, corrupted_tail)

el:

correct_distance = norm_l2(correct_head, relation, correct_tail)

corrupted_distance = norm_l2(corrupted_head, relation, corrupted_tail)

loss = lf.margin + correct_distance - corrupted_distance

if loss >0:

lf.loss += loss

print(loss)

correct_gradient =2*(correct_head + relation - correct_tail)

corrupted_gradient =2*(corrupted_head + relation - corrupted_tail)

==1:

for i in range(len(correct_gradient)):

if correct_gradient[i]>0:

correct_gradient[i]=1

el:

correct_gradient[i]=-1

if corrupted_gradient[i]>0:

corrupted_gradient[i]=1gotta have you mp3

el:

corrupted_gradient[i]=-1

correct_copy_head -= lf.learning_rate * correct_gradient

relation_copy -= lf.learning_rate * correct_gradient

correct_copy_tail -=-1* lf.learning_rate * correct_gradient

relation_copy -=-1* lf.learning_rate * corrupted_gradient

if correct_sample[0]== corrupted_sample[0]:

# if corrupted_triples replaces the tail entity, the head entity's embedding need to be updated twice correct_copy_head -=-1* lf.learning_rate * corrupted_gradient

corrupted_copy_tail -= lf.learning_rate * corrupted_gradient

elif correct_sample[2]== corrupted_sample[2]:

# if corrupted_triples replaces the head entity, the tail entity's embedding need to be updated twice corrupted_copy_head -=-1* lf.learning_rate * corrupted_gradient

correct_copy_tail -= lf.learning_rate * corrupted_gradient

# normalising the new embedding vector, instead of normalising all the embedding together

copy_entity[correct_sample[0]]= lf.normalization(correct_copy_head)

copy_entity[correct_sample[2]]= lf.normalization(correct_copy_tail)

if correct_sample[0]== corrupted_sample[0]:

# if corrupted_triples replace the tail entity, update the tail entity's embedding

copy_entity[corrupted_sample[2]]= lf.normalization(corrupted_copy_tail)

elif correct_sample[2]== corrupted_sample[2]:

# if corrupted_triples replace the head entity, update the head entity's embedding

copy_entity[corrupted_sample[0]]= lf.normalization(corrupted_copy_head)

# the paper mention that the relation's embedding don't need to be normalid

copy_relation[correct_sample[1]]= relation_copy

# copy_relation[correct_sample[1]] = lf.normalization(relation_copy)

if __name__ =='__main__':

file1 ="/"

file2 ="/"

file3 ="/"

entity_t, relation_t, triple_list = dataloader(file1, file2, file3)

oath# modify by yourlf

transE = TransE(entity_t, relation_t, triple_list, embedding_dim=30, lr=0.01, margin=1.0, norm=2)

msldtransE.data_initiali()

本文发布于:2023-06-16 19:26:29，感谢您对本站的认可！

本文链接：https://www.wtabcd.cn/fanwen/fan/90/147304.html

上一篇：Unit 6 The Meaning of English (I) 教学提示

下一篇：《语言学导论》第05章练习及答案)

标签：小学学费上海

留言与评论（共有 0 条评论）