kinetics-skeleton格式行为数据提取方法

更新时间:2023-07-04 17:07:12 阅读: 评论:0

kinetics-skeleton格式⾏为数据提取⽅法
kinetics-skleton格式⾏为数据提取⽅法
利⽤Lightweight-OpenPo⽣成kinetics-skeleton格式数据
这⾥记录利⽤Lightweight-Openpo进⾏⼈体姿态估计,并⾃动提取kinetics-skeleton格式的⾏为数据⽅法。⽣成的数据可⽤于st-gcn 模型的训练,
1、配置Lightweight-OpenPo运⾏环境
获取lightweight-human-po-estimation项⽬代码,配置依赖
git /Daniil-Osokin/lightweight-human-po-estimation.pytorch.git
cd lightweight-human-po-estimation.pytorch
pip install -
其中pytorch和⼀些装不上的库可以单独安装
2、下载⼈体姿态估计模型
项⽬中提供了训练 好的⼈体姿态估计模型,可直接下载。
下载后可以⽤demo测试以下
python demo.py --checkpoint-path <path_to>/checkpoint_iter_370000.pth --video 0
3、kinetics-skeleton数据⽣成
kinetics-skeleton数据的具体格式可以下载样例查看
提取码: yiww
这⾥需要修稿demo.py,将预测的姿态逐帧写⼊json⽂件中。
这⾥直接把整个python代码贴下⾯
import argpar
import cv2
import numpy as np
import torch
import time
import json
import os
from models.with_mobilenet import PoEstimationWithMobileNet
from modules.keypoints import extract_keypoints, group_keypoints
from modules.load_state import load_state
from modules.po import Po, track_pos
from val import normalize, pad_width
class VideoReader(object):
def__init__(lf, file_name):
lf.file_name = file_name
try:# OpenCV needs int to read from webcam
lf.file_name =int(file_name)
except ValueError:
pass
def__iter__(lf):
lf.cap = cv2.VideoCapture(lf.file_name)
if not lf.cap.isOpened():
if not lf.cap.isOpened():
rai IOError('Video {} cannot be opened'.format(lf.file_name))
return lf
def__next__(lf):
was_read, img = ad()
if not was_read:
rai StopIteration
return img
def infer_fast(net, img, net_input_height_size, stride, upsample_ratio, cpu,
pad_value=(0,0,0), img_mean=np.array([128,128,128], np.float32), img_scale=np.float32(1/256)):
height, width, _ = img.shape
scale = net_input_height_size / height
scaled_img = size(img,(0,0), fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
scaled_img = normalize(scaled_img, img_mean, img_scale)
min_dims =[net_input_height_size,max(scaled_img.shape[1], net_input_height_size)]
padded_img, pad = pad_width(scaled_img, stride, pad_value, min_dims)
笔记本怎么换显卡
tensor_img = torch.from_numpy(padded_img).permute(2,0,1).unsqueeze(0).float()
if not cpu:
tensor_img = tensor_img.cuda()
stages_output = net(tensor_img)
stage2_heatmaps = stages_output[-2]
heatmaps = np.transpo(stage2_heatmaps.squeeze().cpu().data.numpy(),(1,2,0))
heatmaps = size(heatmaps,(0,0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC)
stage2_pafs = stages_output[-1]
pafs = np.transpo(stage2_pafs.squeeze().cpu().data.numpy(),(1,2,0))
pafs = size(pafs,(0,0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC)
return heatmaps, pafs, scale, pad
def run_demo(net, image_provider, height_size, cpu, track, smooth, video_info, label_index, label_name, annotation_name, output_dir):    net = net.eval()
if not cpu:
net = net.cuda()
老旧小区改造实施方案stride =8
upsample_ratio =4
num_keypoints = Po.num_kpts
previous_pos =[]
frameIndex =0#json file
data_json ={}#json file
data_action =[]#json file
track_num =5
Po.last_id =-1
delay =1
for img in image_provider:
frame_skeleton ={}
frame_skeleton["frame_index"]= frameIndex +1
have_skeleton =Fal
time_start = time.time()
orig_img = py()
heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio, cpu)
辣白菜拌面total_keypoints_num =0
all_keypoints_by_type =[]
for kpt_idx in range(num_keypoints):# 19th for bg
total_keypoints_num += extract_keypoints(heatmaps[:,:, kpt_idx], all_keypoints_by_type, total_keypoints_num)
po_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs)
for kpt_id in range(all_keypoints.shape[0]):
all_keypoints[kpt_id,0]=(all_keypoints[kpt_id,0]* stride / upsample_ratio - pad[1])/ scale
all_keypoints[kpt_id,1]=(all_keypoints[kpt_id,1]* stride / upsample_ratio - pad[0])/ scale
current_pos =[]
for n in range(len(po_entries)):
# if len(po_entries[n]) == 0:
if len(po_entries[n])<6:
continue
鲸鱼的生活习性
po_keypoints = np.ones((num_keypoints,2), dtype=np.int32)*-1
for kpt_id in range(num_keypoints):
if po_entries[n][kpt_id]!=-1.0:# keypoint was found
po_keypoints[kpt_id,0]=int(all_keypoints[int(po_entries[n][kpt_id]),0])
po_keypoints[kpt_id,1]=int(all_keypoints[int(po_entries[n][kpt_id]),1])
po = Po(po_keypoints, po_entries[n][19])
题邻居current_pos.append(po)
time_end = time.time()
print('time cost', time_end-time_start,'s')
if track:
track_pos(previous_pos, current_pos, smooth=smooth)
previous_pos = current_pos
for po in current_pos:
po.draw(img)
for po in current_pos:
(po.bbox[0]+ po.bbox[2], po.bbox[1]+ po.bbox[3]),(0,255,0))
if track:
cv2.putText(img,'id: {}'.format(po.id),(po.bbox[0], po.bbox[1]-16),
cv2.FONT_HERSHEY_COMPLEX,0.5,(0,0,255))
cv2.namedWindow('Lightweight Human Po Estimation Python Demo',0)
cv2.imshow('Lightweight Human Po Estimation Python Demo', img)
cv2.waitKey(1)
if len(current_pos)>0:
data_skeleton =[{"po":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], "score":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}]* track_num
# data_skeleton = [{}] * track_num
data_one_skeleton ={}
for po in current_pos:
data_po =[]
data_score =[]
for kpt_id in range(0, num_keypoints):#18 keypoint
if po.keypoints[kpt_id][0]!=-1and po.keypoints[kpt_id][1]!=-1:
data_po.append(float(format(po.keypoints[kpt_id][0]/img.shape[1],'.3f')))
data_po.append(float(format(po.keypoints[kpt_id][1]/img.shape[0],'.3f')))
data_score.append(1.0)
el:
data_po.append(0.0)
data_po.append(0.0)
data_score.append(0.0)
data_one_skeleton["po"]= data_po
data_one_skeleton["score"]= data_score
if po.id< track_num:
data_skeleton[po.id]= data_one_skeleton
frame_skeleton["skeleton"]= data_skeleton
have_skeleton =True
el:
frame_skeleton["skeleton"]=[]
#generate skeleton for stgcn
data_action.append(frame_skeleton)
# increa frame index
frameIndex +=1
data_json["data"]= data_action
data_json["label"]=str(file.split('.')[0].split('_')[0])
data_json["label_index"]= label_index
video_info[str(file.split('.')[0])]={
"has_skeleton": have_skeleton,
"label":str(file.split('.')[0].split('_')[0]),
"label_index": label_index
}
label_index +=1
if not ists(output_dir):
os.mkdir(output_dir)
with open(os.path.join(output_dir,file.split('.')[0]+".json"),'w')as f:
json.dump(data_json, f)
with open(annotation_name,'w')as f1:
搜索频道
f1.write(json.dumps(video_info, ensure_ascii=Fal, indent=1))
return video_info
if __name__ =='__main__':
parr = argpar.ArgumentParr(
description='''Lightweight human po estimation python demo.
This is just for quick results preview.
Plea, consider c++ demo for the best performance.''')
parr.add_argument('--checkpoint-path',type=str, default='./checkpoint_iter_370000.pth',help='path to the checkpoint')
parr.add_argument('--height-size',type=int, default=256,help='network input layer height size')
# 这⾥设置需要处理的视频路径
parr.add_argument('--videos',type=str, default='videos/',help='path to video dir')
parr.add_argument('--output_dir',type=str, default='output/skeletons/',help='path to output result')
parr.add_argument('--annotation_name',type=str, default='output/kinetics_train_label.json',help='Path to save output as json file. If nothing is given, t he output will cant be saved.')
# 若GPU不能⽤需切换CPU
三角函数恒等变换
parr.add_argument('--cpu', action='store_true',help='run network inference on cpu')
parr.add_argument('--track',type=int, default=1,help='track po id in video')
parr.add_argument('--smooth',type=int, default=1,help='smooth po keypoints')
args = parr.par_args()
if args.videos =='':
rai ValueError('Either --videos has to be provided')
net = PoEstimationWithMobileNet()
checkpoint = torch.load(args.checkpoint_path, map_location='cpu')
load_state(net, checkpoint)
files = os.listdir(args.videos)
video_info ={}
for file in files:
if not os.path.isdir(file):
妹妹的味道
video_path = os.path.join(args.videos,file)
# 根据⽂件名确定label_index
label_name =file.split('_')[0]
label_list =['dancing','standing','walking','laying']
label_index = label_list.index(label_name)
frame_provider = VideoReader(video_path)
video_info = run_demo(net, frame_provider, args.height_size, args.cpu, ack,
args.smooth, video_info, label_index, label_name, args.annotation_name, args.output_dir)
注意如下代码,这⾥需要根据视频的命名来确定类别。例如dancing类别的视频样本,可以命名为“dancing_00001.mp4”
# 根据⽂件名确定label_index
label_name =file.split('_')[0]
label_list =['dancing','standing','walking','laying']
label_index = label_list.index(label_name)
运⾏过程中,窗⼝会显⽰姿态估计效果,运⾏后⽣成内容在output⽂件夹中。

本文发布于:2023-07-04 17:07:12,感谢您对本站的认可!

本文链接:https://www.wtabcd.cn/fanwen/fan/89/1067692.html

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。

标签:视频   姿态   需要   格式   估计   类别   代码   模型
相关文章
留言与评论(共有 0 条评论)
   
验证码:
推荐文章
排行榜
Copyright ©2019-2022 Comsenz Inc.Powered by © 专利检索| 网站地图