FasterRcnn⼀个⽬标被识别成多个label(重复框问题)
在使⽤faster-rcnn检测⽬标时,出现这种问题很正常,对于作者的数据集来说,主要是识别⼀些物体,所以出现下⾯这种图的结果很正常。
但是当我们进⾏检测,有可能检测的是⼀个物体的状态,由于模型的拟合的不够好,⼀个物体的状态有可能会被检测成两种不同的状态,这样就需要我们对两种状态取⼀个最⼤值,然后保存。例如下图
金枪鱼披萨
该label标注的是normal,但在识别的时候出现了normal和tilt。只能说明模型的拟合程度不够好,并不是哪⾥出错了
解决⽅法其实很简单:
将所有的 box都保存起来,然后作nms,nms之后再进⾏可视化操作(就是画框框,上图相当于nms后把normal去掉了)
下⾯是我demo.py的代码。
在此之前,已经在原作者的基础上进⾏修改了,主要修改的⽅⾯有: 、 、还有⼀个是计数⽅便数据统计
#!/usr/bin/env python
# ._*_.coding:utf-8
# --------------------------------------------------------
# Tensorflow Faster R-CNN
# Licend under The MIT Licen [e LICENSE for details]
# Licend under The MIT Licen [e LICENSE for details]
# Written by Xinlei Chen, bad on code from Ross Girshick
# --------------------------------------------------------
"""
Demo script showing detections in sample images.
See README.md for installation instructions before running.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import xml.dom.minidom as xmldom
import _init_paths
fig import cfg
st import im_detect
s_wrapper import nms
from utils.timer import Timer
import tensorflow as tf
import matplotlib
matplotlib.u('Agg')
import matplotlib.pyplot as plt
import numpy as np
import os, cv2
import argpar
from nets.vgg16 import vgg16
snet_v1 import resnetv1
CLASSES = ('__background__',
'tilt', 'miss', 'normal')
NETS = {'vgg16': ('vgg16_faster_rcnn_iter_70000.ckpt',), 'res101': ('res101_faster_rcnn_iter_100000.ckpt',)} DATASETS = {'pascal_voc': ('voc_2007_trainval',), 'pascal_voc_0712': ('voc_2007_trainval+voc_2012_trainval',)}
# 计算IOU交并⽐
def compute_IOU(rec1, rec2):
"""
计算两个矩形框的交并⽐。
:param rec1: (x0,y0,x1,y1) (x0,y0)代表矩形左上的顶点,(x1,y1)代表矩形右下的顶点。下同。
:param rec2: (x0,y0,x1,y1)
:return: 交并⽐IOU.
"""
left_column_max = max(rec1[0], rec2[0])
right_column_min = min(rec1[2], rec2[2])
up_row_max = max(rec1[1], rec2[1])
down_row_min = min(rec1[3], rec2[3])
# 两矩形⽆相交区域的情况
if left_column_max >= right_column_min or down_row_min <= up_row_max:
return 0
# 两矩形有相交区域的情况
el:
S1 = (rec1[2] - rec1[0]) * (rec1[3] - rec1[1])
S2 = (rec2[2] - rec2[0]) * (rec2[3] - rec2[1])
S_cross = (down_row_min - up_row_max) * (right_column_min - left_column_max)
return S_cross / (S1 + S2 - S_cross)
global count
count = {
"tilt": 0,
"normal": 0,
"miss": 0
}
}
global count1
count1 = {
"tilt": 0,
"normal": 0,
"miss": 0
}
global iou_sum
iou_sum = {
"sum": 0.0,
"time": 0.0
}
徐志摩的诗global error
error = {
"cuowu": '000'
}
def vis_detections(im, class_name, dets, image_name, ax, im_file, thresh=0.5):
"""Draw detected bounding boxes."""
inds = np.where(dets[:, -1] >= thresh)[0]
if len(inds) == 0:
return
# 查找的图⽚地址
# 怎么获取图⽚的GB呢
# print(im_file)
inpath = '/home/share/liubo/tf-faster-rcnn1/data/VOCdevkit2007/VOC2007/Annotations/' + image_name[0:-4] + '.xml' # inpath = inpath.decode('GB2312').encode('utf-8')
# uipath = unicode(inpath, "GB2312")
domobj = xmldom.par(inpath)
elementobj = domobj.documentElement
name = ElementsByTagName("name")
xmin = ElementsByTagName("xmin")
ymin = ElementsByTagName("ymin")
xmax = ElementsByTagName("xmax")
ymax = ElementsByTagName("ymax")
size = len(name)
if im_name == '300313.JPG':
print(name[0].firstChild.data)
for i in range(size):
ax.add_patch(plt.Rectangle((int(xmin[i].firstChild.data), int(ymin[i].firstChild.data)),
int(xmax[i].firstChild.data) - int(xmin[i].firstChild.data),
int(ymax[i].firstChild.data) - int(ymin[i].firstChild.data), fill=Fal,
edgecolor='yellow', linewidth=3.5)
)
<(int(xmax[i].firstChild.data), int(ymin[i].firstChild.data) - 2,
'{:s}'.format(name[i].firstChild.data),
bbox=dict(facecolor='white', alpha=0.5),
fontsize=20, color='black')
# count1[name[i].firstChild.data] += 1
# im = im[:, :, (2, 1, 0)]
# fig, ax = plt.subplots(figsize=(40, 40))
# ax.imshow(im, aspect='equal')
for i in inds:
bbox = dets[i, :4]
score = dets[i, -1]
print(class_name, score)
ax.add_patch(
plt.Rectangle((bbox[0], bbox[1]),
bbox[2] - bbox[0],
bbox[3] - bbox[1], fill=Fal,
edgecolor='red', linewidth=3.5)
edgecolor='red', linewidth=3.5)
)
<(bbox[0], bbox[1] - 2,
'{:s} {:.3f}'.format(class_name, score),
bbox=dict(facecolor='blue', alpha=0.5),
fontsize=14, color='white')
r1 = (bbox[0], bbox[1], bbox[2], bbox[3])
for j in range(size):
r2 = (int(xmin[j].firstChild.data), int(ymin[j].firstChild.data), int(xmax[j].firstChild.data),
int(ymax[j].firstChild.data))
IOU = compute_IOU(r1, r2)
if IOU >= 0.5 and name[j].firstChild.data == class_name:
count[class_name] += 1
iou_sum['sum'] += IOU
if IOU >= 0.5 and name[j].firstChild.data != class_name:
error['cuowu'] = error['cuowu'] + '\n' + image_name[0:-4]
# iou_sum += IOU
# plt.axis('off')
# plt.tight_layout()
# plt.draw()
def demo(ss, net, image_name):
"""Detect object class in an image using pre-computed object proposals."""
# Load the demo image
im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name)
im = cv2.imread(im_file)
# print("------------------------------")
# print(im_file)
# Detect all object class and regress object bounds
timer = Timer()
timer.tic()
scores, boxes = im_detect(ss, net, im)
<()
iou_sum['time'] += al_time
print('Detection took {:.3f}s for {:d} object proposals'.al_time, boxes.shape[0]))
单结构
# Visualize detections for each class
CONF_THRESH = 0.8
NMS_THRESH = 0.0
im = im[:, :, (2, 1, 0)]
fig, ax = plt.subplots(figsize=(40, 40))
ax.imshow(im, aspect='equal')
all_dets = np.empty((0, 5), np.float32)
all_cls = np.empty((0, 6))
for cls_ind, cls in enumerate(CLASSES[1:]):
cls_ind += 1 # becau we skipped background
cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
cls_scores = scores[:, cls_ind]
dets = np.hstack((cls_boxes,
如何判断是否怀孕
cls_scores[:, np.newaxis])).astype(np.float32)
keep = nms(dets, NMS_THRESH)
dets = dets[keep, :]
驱动备份
inds = np.where(dets[:, -1] >= 0.8)[0]
dets = dets[inds, :]
if len(dets) == 0:
continue
all_dets = np.append(all_dets, dets, axis=0)
for i in dets:
all_cls = np.vstack((all_cls, np.hstack((i, np.array([cls_ind])))))
# all_cls = np.hstack((all_dets, np.array([[cls_ind]])))
# all_cls = np.hstack((all_dets, np.array([[cls_ind]])))
# all_cls = np.append(all_cls, np.array([[cls_ind]]), axis=0)
# print('pre nms ************************')
# for i in all_dets:
# print(i)
keep1 = nms(all_dets, NMS_THRESH)
all_dets = all_dets[keep1, :]
all_cls = shape(-1, 6)
all_cls = all_cls[keep1, :]
for i in np.arange(len(all_cls)):
cls_index = int(all_cls[i][5])
vis_detections(im, CLASSES[cls_index], all_dets[i].reshape(-1, 5), image_name, ax, im_file,
thresh=CONF_THRESH)
plt.axis('off')
plt.tight_layout()
plt.draw()
def par_args():
"""Par input arguments."""
parr = argpar.ArgumentParr(description='Tensorflow Faster R-CNN demo')
parr.add_argument('--net', dest='demo_net', help='Network to u [vgg16 res101]',
choices=NETS.keys(), default='res101')
parr.add_argument('--datat', dest='datat', help='Trained datat [pascal_voc pascal_voc_0712]', choices=DATASETS.keys(), default='pascal_voc_0712')
args = parr.par_args()
return args
if __name__ == '__main__':
cfg.TEST.HAS_RPN = True # U RPN for proposals
args = par_args()
# model path
demonet = args.demo_net
datat = args.datat
tfmodel = os.path.join('output', demonet, 'voc_2007_trainval', 'default',
NETS[demonet][0])
if not os.path.isfile(tfmodel + '.meta'):
rai IOError(('{:s} not found.\nDid you download the proper networks from '
'our rver and place them properly?').format(tfmodel + '.meta'))
# t config
猪胆tfconfig = tf.ConfigProto(allow_soft_placement=True)
tfconfig.gpu_options.allow_growth = True
# init ssion
ss = tf.Session(config=tfconfig)
# load network
if demonet == 'vgg16':
net = vgg16()
elif demonet == 'res101':
石油英语net = resnetv1(num_layers=101)
el:
rai NotImplementedError火灾英语
tag='default', anchor_scales=[8, 16, 32])
saver = tf.train.Saver()
print('Loaded network {:s}'.format(tfmodel))
# im_names = ['000456.jpg', '000542.jpg', '001150.jpg',
# '001763.jpg', '004545.jpg']