首页 > 美文鉴赏

kinetics-skeleton格式行为数据提取方法

更新时间:2023-07-04 17:07:12 阅读：评论：0

kinetics-skeleton格式⾏为数据提取⽅法

kinetics-skleton格式⾏为数据提取⽅法

利⽤Lightweight-OpenPo⽣成kinetics-skeleton格式数据

这⾥记录利⽤Lightweight-Openpo进⾏⼈体姿态估计，并⾃动提取kinetics-skeleton格式的⾏为数据⽅法。⽣成的数据可⽤于st-gcn 模型的训练，

1、配置Lightweight-OpenPo运⾏环境

获取lightweight-human-po-estimation项⽬代码，配置依赖

git /Daniil-Osokin/lightweight-human-po-estimation.pytorch.git

cd lightweight-human-po-estimation.pytorch

pip install -

其中pytorch和⼀些装不上的库可以单独安装

2、下载⼈体姿态估计模型

项⽬中提供了训练好的⼈体姿态估计模型，可直接下载。

下载后可以⽤demo测试以下

python demo.py --checkpoint-path <path_to>/checkpoint_iter_370000.pth --video 0

3、kinetics-skeleton数据⽣成

kinetics-skeleton数据的具体格式可以下载样例查看

提取码： yiww

这⾥需要修稿demo.py，将预测的姿态逐帧写⼊json⽂件中。

这⾥直接把整个python代码贴下⾯

import argpar

import cv2

import numpy as np

import torch

import time

import json

import os

from models.with_mobilenet import PoEstimationWithMobileNet

from modules.keypoints import extract_keypoints, group_keypoints

from modules.load_state import load_state

from modules.po import Po, track_pos

from val import normalize, pad_width

class VideoReader(object):

def__init__(lf, file_name):

lf.file_name = file_name

try:# OpenCV needs int to read from webcam

lf.file_name =int(file_name)

except ValueError:

pass

def__iter__(lf):

lf.cap = cv2.VideoCapture(lf.file_name)

if not lf.cap.isOpened():

rai IOError('Video {} cannot be opened'.format(lf.file_name))

return lf

def__next__(lf):

was_read, img = ad()

if not was_read:

rai StopIteration

return img

def infer_fast(net, img, net_input_height_size, stride, upsample_ratio, cpu,

pad_value=(0,0,0), img_mean=np.array([128,128,128], np.float32), img_scale=np.float32(1/256)):

height, width, _ = img.shape

scale = net_input_height_size / height

scaled_img = size(img,(0,0), fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)

scaled_img = normalize(scaled_img, img_mean, img_scale)

min_dims =[net_input_height_size,max(scaled_img.shape[1], net_input_height_size)]

padded_img, pad = pad_width(scaled_img, stride, pad_value, min_dims)

笔记本怎么换显卡

tensor_img = torch.from_numpy(padded_img).permute(2,0,1).unsqueeze(0).float()

if not cpu:

tensor_img = tensor_img.cuda()

stages_output = net(tensor_img)

stage2_heatmaps = stages_output[-2]

heatmaps = np.transpo(stage2_heatmaps.squeeze().cpu().data.numpy(),(1,2,0))

heatmaps = size(heatmaps,(0,0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC)

stage2_pafs = stages_output[-1]

pafs = np.transpo(stage2_pafs.squeeze().cpu().data.numpy(),(1,2,0))

pafs = size(pafs,(0,0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC)

return heatmaps, pafs, scale, pad

def run_demo(net, image_provider, height_size, cpu, track, smooth, video_info, label_index, label_name, annotation_name, output_dir): net = net.eval()

if not cpu:

net = net.cuda()

老旧小区改造实施方案stride =8

upsample_ratio =4

num_keypoints = Po.num_kpts

previous_pos =[]

frameIndex =0#json file

data_json ={}#json file

data_action =[]#json file

track_num =5

Po.last_id =-1

delay =1

for img in image_provider:

frame_skeleton ={}

frame_skeleton["frame_index"]= frameIndex +1

have_skeleton =Fal

time_start = time.time()

orig_img = py()

heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio, cpu)

辣白菜拌面total_keypoints_num =0

all_keypoints_by_type =[]

for kpt_idx in range(num_keypoints):# 19th for bg

total_keypoints_num += extract_keypoints(heatmaps[:,:, kpt_idx], all_keypoints_by_type, total_keypoints_num)

po_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs)

for kpt_id in range(all_keypoints.shape[0]):

all_keypoints[kpt_id,0]=(all_keypoints[kpt_id,0]* stride / upsample_ratio - pad[1])/ scale

all_keypoints[kpt_id,1]=(all_keypoints[kpt_id,1]* stride / upsample_ratio - pad[0])/ scale

current_pos =[]

for n in range(len(po_entries)):

# if len(po_entries[n]) == 0:

if len(po_entries[n])<6:

continue

鲸鱼的生活习性

po_keypoints = np.ones((num_keypoints,2), dtype=np.int32)*-1

for kpt_id in range(num_keypoints):

if po_entries[n][kpt_id]!=-1.0:# keypoint was found

po_keypoints[kpt_id,0]=int(all_keypoints[int(po_entries[n][kpt_id]),0])

po_keypoints[kpt_id,1]=int(all_keypoints[int(po_entries[n][kpt_id]),1])

po = Po(po_keypoints, po_entries[n][19])

题邻居current_pos.append(po)

time_end = time.time()

print('time cost', time_end-time_start,'s')

if track:

track_pos(previous_pos, current_pos, smooth=smooth)

previous_pos = current_pos

for po in current_pos:

po.draw(img)

for po in current_pos:

(po.bbox[0]+ po.bbox[2], po.bbox[1]+ po.bbox[3]),(0,255,0))

if track:

cv2.putText(img,'id: {}'.format(po.id),(po.bbox[0], po.bbox[1]-16),

cv2.FONT_HERSHEY_COMPLEX,0.5,(0,0,255))

cv2.namedWindow('Lightweight Human Po Estimation Python Demo',0)

cv2.imshow('Lightweight Human Po Estimation Python Demo', img)

cv2.waitKey(1)

if len(current_pos)>0:

data_skeleton =[{"po":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0], "score":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}]* track_num

# data_skeleton = [{}] * track_num

data_one_skeleton ={}

for po in current_pos:

data_po =[]

data_score =[]

for kpt_id in range(0, num_keypoints):#18 keypoint

if po.keypoints[kpt_id][0]!=-1and po.keypoints[kpt_id][1]!=-1:

data_po.append(float(format(po.keypoints[kpt_id][0]/img.shape[1],'.3f')))

data_po.append(float(format(po.keypoints[kpt_id][1]/img.shape[0],'.3f')))

data_score.append(1.0)

el:

data_po.append(0.0)

data_score.append(0.0)

data_one_skeleton["po"]= data_po

data_one_skeleton["score"]= data_score

if po.id< track_num:

data_skeleton[po.id]= data_one_skeleton

frame_skeleton["skeleton"]= data_skeleton

have_skeleton =True

el:

frame_skeleton["skeleton"]=[]

#generate skeleton for stgcn

data_action.append(frame_skeleton)

# increa frame index

frameIndex +=1

data_json["data"]= data_action

data_json["label"]=str(file.split('.')[0].split('_')[0])

data_json["label_index"]= label_index

video_info[str(file.split('.')[0])]={

"has_skeleton": have_skeleton,

"label":str(file.split('.')[0].split('_')[0]),

"label_index": label_index

}

label_index +=1

if not ists(output_dir):

os.mkdir(output_dir)

with open(os.path.join(output_dir,file.split('.')[0]+".json"),'w')as f:

json.dump(data_json, f)

with open(annotation_name,'w')as f1:

搜索频道

f1.write(json.dumps(video_info, ensure_ascii=Fal, indent=1))

return video_info

if __name__ =='__main__':

parr = argpar.ArgumentParr(

description='''Lightweight human po estimation python demo.

This is just for quick results preview.

Plea, consider c++ demo for the best performance.''')

parr.add_argument('--checkpoint-path',type=str, default='./checkpoint_iter_370000.pth',help='path to the checkpoint')

parr.add_argument('--height-size',type=int, default=256,help='network input layer height size')

# 这⾥设置需要处理的视频路径

parr.add_argument('--videos',type=str, default='videos/',help='path to video dir')

parr.add_argument('--output_dir',type=str, default='output/skeletons/',help='path to output result')

parr.add_argument('--annotation_name',type=str, default='output/kinetics_train_label.json',help='Path to save output as json file. If nothing is given, t he output will cant be saved.')

# 若GPU不能⽤需切换CPU

三角函数恒等变换

parr.add_argument('--cpu', action='store_true',help='run network inference on cpu')

parr.add_argument('--track',type=int, default=1,help='track po id in video')

parr.add_argument('--smooth',type=int, default=1,help='smooth po keypoints')

args = parr.par_args()

if args.videos =='':

rai ValueError('Either --videos has to be provided')

net = PoEstimationWithMobileNet()

checkpoint = torch.load(args.checkpoint_path, map_location='cpu')

load_state(net, checkpoint)

files = os.listdir(args.videos)

video_info ={}

for file in files:

if not os.path.isdir(file):

妹妹的味道

video_path = os.path.join(args.videos,file)

# 根据⽂件名确定label_index

label_name =file.split('_')[0]

label_list =['dancing','standing','walking','laying']

label_index = label_list.index(label_name)

frame_provider = VideoReader(video_path)

video_info = run_demo(net, frame_provider, args.height_size, args.cpu, ack,

args.smooth, video_info, label_index, label_name, args.annotation_name, args.output_dir)

注意如下代码，这⾥需要根据视频的命名来确定类别。例如dancing类别的视频样本，可以命名为“dancing_00001.mp4”

# 根据⽂件名确定label_index

label_name =file.split('_')[0]

label_list =['dancing','standing','walking','laying']

label_index = label_list.index(label_name)

运⾏过程中，窗⼝会显⽰姿态估计效果，运⾏后⽣成内容在output⽂件夹中。

本文发布于:2023-07-04 17:07:12，感谢您对本站的认可！

本文链接：https://www.wtabcd.cn/fanwen/fan/89/1067692.html

上一篇：基于移动端轻量级深度学习图像去雾算法的研究与实现

下一篇：ldap工作原理

标签：视频姿态需要格式估计类别代码模型

留言与评论（共有 0 条评论）