def dataloader(file1, file2, file3):
print("")
entity =[]
relation =[]
with open(file2,'r')as f1,open(file3,'r')as f2:
lines1 = f1.readlines()
lines2 = f2.readlines()
for line in lines1:
line = line.strip().split('\t')
if len(line)!=2:
continue
entities2id[line[0]]= line[1]
entity.append(line[1])
for line in lines2:
line = line.strip().split('\t')
if len(line)!=2:
continue
relations2id[line[0]]= line[1]
relation.append(line[1])
triple_list =[]
textbookwith codecs.open(file1,'r')as f:
content = f.readlines()
for line in content:
triple = line.strip().split("\t")
if len(triple)!=3:
continue
h_ = entities2id[triple[0]]
r_ = relations2id[triple[1]]
t_ = entities2id[triple[2]]
triple_list.append([h_, r_, t_])
print("Complete load. entity : %d , relation : %d , triple : %d"%(
上海世外小学学费len(entity),len(relation),len(triple_list)))
return entity, relation, triple_list
def norm_l1(h, r, t):
return np.sum(np.fabs(h + r - t))
def norm_l2(h, r, t):
return np.sum(np.square(h + r - t))
class TransE:
def__init__(lf, entity, relation, triple_list, embedding_dim=50, lr=0.01, margin=1.0, norm=1): lf.entities = entity
lf.dimension = embedding_dim
lf.learning_rate = lr
lf.margin = margin
< = norm
lf.loss =0.0
def data_initiali(lf):
entityVectorList ={}
relationVectorList ={}
for entity ities:
entity_vector = np.random.uniform(-6.0/ np.sqrt(lf.dimension),6.0/ np.sqrt(lf.dimension),
lf.dimension)
entityVectorList[entity]= entity_vector
for relation lations:
relation_vector = np.random.uniform(-6.0/ np.sqrt(lf.dimension),6.0/ np.sqrt(lf.dimension),
lf.dimension)
relation_vector = lf.normalization(relation_vector)
relationVectorList[relation]= relation_vector
def normalization(lf, vector):
return vector / (vector)
def training_run(lf, epochs=1, nbatches=100, out_file_title =''):
batch_size =int(iples)/ nbatches)
print("batch size: ", batch_size)
for epoch in range(epochs):
start = time.time()
lf.loss =0.0
# Normali the embedding of the entities to 1
for entity ities.keys():
for batch in range(nbatches):
batch_samples = random.iples, batch_size)
Tbatch =[]
for sample in batch_samples:
corrupted_sample = copy.deepcopy(sample)
pr = np.random.random(1)[0]
if pr >0.5:
# change the head entity
corrupted_sample[0]= random.ities.keys(),1)[0]
while corrupted_sample[0]== sample[0]:
corrupted_sample[0]= random.ities.keys(),1)[0]
el:
# change the tail entity
corrupted_sample[2]= random.ities.keys(),1)[0]
while corrupted_sample[2]== sample[2]:
corrupted_sample[2]= random.ities.keys(),1)[0]
if(sample, corrupted_sample)not in Tbatch:
Tbatch.append((sample, corrupted_sample))
lf.update_triple_embedding(Tbatch)
end = time.time()
print("epoch: ", epoch,"cost time: %s"%(round((end - start),3)))zarva
print("running loss: ", lf.loss)
with codecs.open(out_file_title +"TransE_entity_"+str(lf.dimension)+"dim_batch"+str(batch_size),"w")as f1:
for e ities.keys():
# f1.write("\t")
# f1.write(e + "\t")
f1.write(str(ities[e])))
f1.write("\n")
with codecs.open(out_file_title +"TransE_relation_"+str(lf.dimension)+"dim_batch"+str(batch_size),"w")as f2: for r lations.keys():
for r lations.keys():
# f2.write("\t")lng是什么意思
# f2.write(r + "\t")
f2.write(str(lations[r])))
f2.write("\n")
def update_triple_embedding(lf, Tbatch):
# deepcopy 可以保证,即使list嵌套list也能让各层的地址不同,即这⾥copy_entity 和
# entitles中所有的elements都不同
copy_entity = copy.ities)
copy_relation = copy.lations)
for correct_sample, corrupted_sample in Tbatch:
correct_copy_head = copy_entity[correct_sample[0]]
correct_copy_tail = copy_entity[correct_sample[2]]
世界大学排名relation_copy = copy_relation[correct_sample[1]]les是什么意思哦
corrupted_copy_head = copy_entity[corrupted_sample[0]]
corrupted_copy_tail = copy_entity[corrupted_sample[2]]
correct_head = lf.entities[correct_sample[0]]
correct_tail = lf.entities[correct_sample[2]]
relation = lf.relations[correct_sample[1]]
corrupted_head = lf.entities[corrupted_sample[0]]
corrupted_tail = lf.entities[corrupted_sample[2]]
# calculate the distance of the triples
母亲节快乐英语怎么说
==1:
correct_distance = norm_l1(correct_head, relation, correct_tail)
corrupted_distance = norm_l1(corrupted_head, relation, corrupted_tail)
el:
correct_distance = norm_l2(correct_head, relation, correct_tail)
corrupted_distance = norm_l2(corrupted_head, relation, corrupted_tail)
loss = lf.margin + correct_distance - corrupted_distance
if loss >0:
lf.loss += loss
print(loss)
correct_gradient =2*(correct_head + relation - correct_tail)
corrupted_gradient =2*(corrupted_head + relation - corrupted_tail)
==1:
for i in range(len(correct_gradient)):
if correct_gradient[i]>0:
correct_gradient[i]=1
el:
correct_gradient[i]=-1
if corrupted_gradient[i]>0:
corrupted_gradient[i]=1gotta have you mp3
el:
corrupted_gradient[i]=-1
correct_copy_head -= lf.learning_rate * correct_gradient
relation_copy -= lf.learning_rate * correct_gradient
correct_copy_tail -=-1* lf.learning_rate * correct_gradient
relation_copy -=-1* lf.learning_rate * corrupted_gradient
if correct_sample[0]== corrupted_sample[0]:
# if corrupted_triples replaces the tail entity, the head entity's embedding need to be updated twice correct_copy_head -=-1* lf.learning_rate * corrupted_gradient
corrupted_copy_tail -= lf.learning_rate * corrupted_gradient
corrupted_copy_tail -= lf.learning_rate * corrupted_gradient
elif correct_sample[2]== corrupted_sample[2]:
# if corrupted_triples replaces the head entity, the tail entity's embedding need to be updated twice corrupted_copy_head -=-1* lf.learning_rate * corrupted_gradient
correct_copy_tail -= lf.learning_rate * corrupted_gradient
# normalising the new embedding vector, instead of normalising all the embedding together
copy_entity[correct_sample[0]]= lf.normalization(correct_copy_head)
copy_entity[correct_sample[2]]= lf.normalization(correct_copy_tail)
if correct_sample[0]== corrupted_sample[0]:
# if corrupted_triples replace the tail entity, update the tail entity's embedding
copy_entity[corrupted_sample[2]]= lf.normalization(corrupted_copy_tail)
elif correct_sample[2]== corrupted_sample[2]:
# if corrupted_triples replace the head entity, update the head entity's embedding
copy_entity[corrupted_sample[0]]= lf.normalization(corrupted_copy_head)
# the paper mention that the relation's embedding don't need to be normalid
copy_relation[correct_sample[1]]= relation_copy
# copy_relation[correct_sample[1]] = lf.normalization(relation_copy)
if __name__ =='__main__':
file1 ="/"
file2 ="/"
file3 ="/"
entity_t, relation_t, triple_list = dataloader(file1, file2, file3)
oath# modify by yourlf
transE = TransE(entity_t, relation_t, triple_list, embedding_dim=30, lr=0.01, margin=1.0, norm=2)
msldtransE.data_initiali()