信息检索的PR 曲线(Precision-RecallCurve )作图
Notes 多模态检索中常⽤⼏种评价指标:Precision-Recall Curve
引⽤ [1] ⾥介绍的 PR 曲线的例⼦是只针对⼀个 query sample ⽽⾔的,不好说代表性。照师兄的说法,现在有⼀种画法是:枚举 top-R 之 R,对每⼀个 R,都算⼀次
和
然后以 作图。即P 对应平均 Precision@R ,R 对应平均 Recall@R 。他给的源程序是 matlab 代码,转写成 python 备份。
Code python
函数主体类似计算 mAP@R 的过程,参照前作:
y =i mean (各query sample 的「Precision @R 」)
x =i mean (各query sample 的「Recall @R 」)
(x ,y )i i领款凭证
import matplotlib.pyplot as plt
import numpy as np
from scipy.spatial.distance import cdist杜莎夫人
# 画 P-R 曲线
def pr_curve(qF, rF, qL, rL, what=0, topK=-1):
"""Input:
what: {0: cosine 距离, 1: Hamming 距离}
topK: 即 mAP 中的 position threshold,只取前 k 个检索结果,默认 `-1` 是全部,见 [3] """
n_query = qF.shape[0]
if topK ==-1or topK > rF.shape[0]:# top-K 之 K 的上限
topK = rF.shape[0]
Gnd =(np.dot(qL, rL.transpo())>0).astype(np.float32)
if what ==0:
Rank = np.argsort(cdist(qF, rF,'cosine'))
el:
Rank = np.argsort(cdist(qF, rF,'hamming'))
P, R =[],[]
for k in range(1, topK +1):# 枚举 top-K 之 K
# ground-truth: 1 vs all
p = np.zeros(n_query)# 各 query sample 的 Precision@R
r = np.zeros(n_query)# 各 query sample 的 Recall@R
for it in range(n_query):# 枚举 query sample
小学生日记格式
gnd = Gnd[it]
gnd_all = np.sum(gnd)# 整个被检索数据库中的相关样本数
if gnd_all ==0:
continue
姓英语
asc_id = Rank[it][:k]
gnd = gnd[asc_id]
gnd_r = np.sum(gnd)# top-K 中的相关样本数
p[it]= gnd_r / k
r[it]= gnd_r / gnd_all
P.an(p))
R.an(r))
# 画 P-R 曲线
fig = plt.figure(figsize=(5,5))
plt.plot(R, P)# 第⼀个是 x,第⼆个是 y
plt.xlim(0,1)
plt.ylim(0,1)
plt.xlabel('recall')
plt.ylabel('precision')
韩式火锅的做法plt.legend()
plt.show()
罗平油菜花matlab
师兄给的 matlab 源码
function [map, recallA] = myPr(sim_x, L_tr, L_te, mark)
j = 0;
for R = 10:100:2110
j = j + 1;
[~, cat] = size(L_tr);
multiLabel = cat > 1;
if multiLabel
Label = L_tr * L_te';
end
tn = size(sim_x,2); % query 的样本数
ap = zeros(tn,1);
recall = zeros(tn,1);
for i = 1 : tn
山中问答
if mark == 0
% inxx 保存与第 i 个测试样本 hammingDist 最⼩的前 R 个 databa 样本所在的位置 [~, inxx] = sort(sim_x(:, i), 'descend');
elif mark == 1
[~, inxx] = sort(sim_x(:, i));
end
if multiLabel
inxx = inxx(1: R);
ranks = find(Label(inxx, i) > 0)';
el
inxx = inxx(1: R);
tr_gt = L_tr(inxx); % tr_gt 为前 R 个实例的标签
ranks = find(tr_gt == L_te(i))'; % ranks 为 groundtrue
end
% compute AP for the query
if impty(ranks)
ap(i) = 0;
el
% ap(i) = sum((1: length(ranks)) ./ ranks) / length(ranks);
if multiLabel
% #relavant-in-result / #result让人笑喷的网名
ap(i) = length(ranks) / length(inxx);
% #relavant-in-result / #relavant-in-all
recall(i) = length(ranks) / length(find(Label(:, i)>0));
el
ap(i) = length(ranks) / length(inxx);
recall(i) = length(ranks) / length(find(L_tr == L_te(i)));
end
end
end
map(j) = mean(ap);
recallA(j) = mean(recall);
end
References
1.
2.
3.