首页 > 美文阅读

Tensorrt加速Tensorflow推断速度（python和c++）

更新时间:2023-06-19 00:36:40 阅读：评论：0

博主的环境配置可参考之前博客。

准备⼯作结束后，下⾯开始进⼊实验阶段。

实验⼀: tensorflow的pb模型转化为uff模型，tensorrt加载uff模型去预测图⽚

使⽤前⾯中获得的pb模型，将其转化为uff模型，再转化为tensorrt后去预测猫咪图⽚，看看时间需要多少（前⾯可看到未加速前，所花费时间为2.5s）

pb转uff模型的过程见我前⾯博客

在其基础上修改了代码，对⼀张图⽚预测，测试的uff模型是从上⾯博客中获得，图⽚还是之前⼏篇中所⽤的猫咪图

1from random import randint

3import tensorrt

4from PIL import Image

5import numpy as np

6from tensorflow.snet50 import preprocess_input, decode_predictions

高二周记

7import pycuda.driver as cuda

8# This import caus pycuda to automatically manage CUDA context creation and cleanup.

9import pycuda.autoinit

10from tensorflow.keras.preprocessing import image

11import tensorrt as trt

13import sys, os

学生日记50字14sys.path.inrt(1, os.path.join(sys.path[0], ".."))

15import common

16import cv2

17import time

19# You can t the logger verity higher to suppress messages (or lower to display more messages).

我在春天等你简谱20TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

20TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

21# Frozen model layers:

22# Input

23# resnet50/conv1_pad/Pad/paddings

24# resnet50/conv1_pad/Pad

25class ModelData(object):

26 MODEL_FILE = "weights.uff"

27 INPUT_NAME = "Input"

28 INPUT_SHAPE = (224, 224,3)

四川美术学院分数线29 OUTPUT_NAME = "resnet50/predictions/Softmax"

30# resnet50/predictions/BiasAdd

31# resnet50/predictions/Softmax

32# Identity

34def build_engine():

35 # For more information on TRT basics, refer to the introductory samples.

36 with trt.Builder(TRT_LOGGER) as builder, ate_network() as network, ate_builder_config() as config, trt.UffParr() as parr:

37 config.max_workspace_size = common.GiB(1)

38 # Par the Uff Network

39 ister_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE, tensorrt.UffInputOrder.NHWC)

40 ister_output(ModelData.OUTPUT_NAME)

41 parr.par(ModelData.MODEL_FILE, network)

42 engine = builder.build_engine(network, config)

44 return engine

46def main():

47 engine = build_engine()

48 # Build an engine, allocate buffers and create a stream.

49 # For more information on buffer allocation, refer to the introductory samples.

50 inputs, outputs, bindings, stream = common.allocate_buffers(engine)

51 ate_execution_context() as context:

52 img = image.load_img('2008_002682.jpg', target_size=(224, 224))

53 img = image.img_to_array(img)

54 img = preprocess_input(img)

55 print(img.shape)

56 img = waxis, :]

57 inputs[0].host = img.ravel()

58 print(inputs[0].host.shape)

瓷器的历史59

60 t_model = time.perf_counter()

61 result = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)

62 print(f'do inference cost:{time.perf_counter() - t_model:.8f}s')

64 output = np.array(result[1])

65 output = waxis,:]

66 print(output.shape)

67 print('Predicted:', decode_predictions(output, top=5)[0])

70if __name__ == '__main__':

71 main()

预测结果如下：

对⽐直接使⽤pb模型来预测（需要2s的预测时间），预测结果⼀致，时间却⼤⼤提升了，这⾥只需要0.004s左右，可以看到时间⼤⼤提

升了。

如下是关键语句：

实验⼆：tensorflow的pb模型转化为onnx模型，tensorrt加载onnx模型对图⽚进⾏预测

还是那这篇中⽣成的pb模型做实验

安装下如下包

pip install -U tf2onnx

可执⾏如下命令，即可以⽅便完成转换

python -vert --graphdef weights.pb -- --inputs Input:0 --outputs resnet50/predictions/Softmax:0

需要输⼊和输出节点名字（中有打印每层的名字，所以博主这边是可以获取到的）

接下来就是⽤tensorrt去加载此onnx模型，转化为engine后来预测图⽚，此可借鉴我之前博客

./trtexec --onnx=/home/sxhlvye/Trial1/ --saveEngine=/home/sxhlvye/Trial1/

完毕后，再执⾏代码如下（不同于前⾯博客，博主这⾥稍微做了些改动）：

1import sys

2import cv2

3from PIL import Image

4from tensorflow.keras.preprocessing import image

5from tensorflow.keras.applications import resnet50

6from tensorflow.snet50 import preprocess_input, decode_predictions

7import tensorflow as tf

8import time如何建立企业文化

9import numpy as np

10# This import caus pycuda to automatically manage CUDA context creation and cleanup.

11import pycuda.autoinit

12import tensorrt as trt

13import common

14import pycuda.driver as cuda

15import time

16import matplotlib.pyplot as plt

17import cv2

18# You can t the logger verity higher to suppress messages (or lower to display more messages).

19TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

21filename = "2008_002682.jpg"

22engine_file_path = ""

24class HostDeviceMem(object):

25 def __init__(lf, host_mem, device_mem):

26 """Within this context, host_mom means the cpu memory and device means the GPU memory

27 """

28 lf.host = host_mem

29 lf.device = device_mem

30 def __str__(lf):

31 return "Host:\n" + str(lf.host) + "\nDevice:\n" + str(lf.device)

32 def __repr__(lf):

33 return lf.__str__()

35def allocate_buffers(engine):

36 inputs = []

37 outputs = []

38 bindings = []

39 stream = cuda.Stream()

40 for binding in engine:

41 size = trt._binding_shape(binding)) * engine.max_batch_size

42 dtype = trt._binding_dtype(binding))

43 # Allocate host and device buffers

44 host_mem = cuda.pagelocked_empty(size, dtype)

45 device_mem = _alloc(host_mem.nbytes)

46 # Append the device buffer to device bindings.

47 bindings.append(int(device_mem))

48 # Append to the appropriate list.

49 if engine.binding_is_input(binding):

50 inputs.append(HostDeviceMem(host_mem, device_mem))

51 el:

52 outputs.append(HostDeviceMem(host_mem, device_mem))

54 return inputs, outputs, bindings, stream

56def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):

57 # Transfer data from CPU to the GPU.

58 [py_htod_async(inp.device, inp.host, stream) for inp in inputs]

60 # Run inference.

61 t_model = time.perf_counter()

62 ute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)

63 print(f'only one line cost:{time.perf_counter() - t_model:.8f}s')

65 # Transfer predictions back from the GPU.

66 [py_dtoh_async(out.host, out.device, stream) for out in outputs]

68 # Synchronize the stream

69 stream.synchronize()

71 # Return only the host outputs.

72 return [out.host for out in outputs]

74def main():

75 print("Reading engine from file {}".format(engine_file_path))

76 with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:

浪淘沙欧阳修77 engine = runtime.derialize_cuda_ad())

79 #create the context for this engine

80 context = ate_execution_context()

81 #allocate buffers for input and output

82 inputs, outputs, bindings, stream = allocate_buffers(engine) # input, output: host # bindings

83 #read a image

84 img = image.load_img('2008_002682.jpg', target_size=(224, 224))

85 img = image.img_to_array(img)

86 img = preprocess_input(img)

87 print(img.shape)

88 img = waxis, :]

90 # Load data to the buffer

91 inputs[0].host = img.ravel()

92 print(inputs[0].host.shape)

94 #Do Inference

95 t_model = time.perf_counter()

96 result = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # numpy data

97 print(f'do inference cost:{time.perf_counter() - t_model:.8f}s')

99 output = np.array(result[0])

100 output = waxis, :]

101 print(output.shape)

102 print('Predicted:', decode_predictions(output, top=5)[0])

103

104if __name__ == '__main__':

105 main()

106

107

108

执⾏结果如下：

1/home/sxhlvye/anaconda3/bin/python /home/sxhlvye/Trial1/Tensorrt/test_onnx1.py

22022-03-20 18:13:45.682920: I tensorflow/stream_executor/platform/default/dso_loader:49] Successfully opened dynamic library libcudart.so.11.0 3Reading engine from

4[TensorRT] WARNING: TensorRT was linked against cuBLAS/cuBLAS LT 11.2.0 but loaded cuBLAS/cuBLAS LT 11.1.0

5[TensorRT] WARNING: TensorRT was linked against cuDNN 8.2.0 but loaded cuDNN 8.0.5

6[TensorRT] WARNING: TensorRT was linked against cuBLAS/cuBLAS LT 11.2.0 but loaded cuBLAS/cuBLAS LT 11.1.0

7[TensorRT] WARNING: TensorRT was linked against cuDNN 8.2.0 but loaded cuDNN 8.0.5

8(224, 224, 3)

9(150528,)

10only one line cost:0.33635211s

11do inference cost:0.33758112s

12(1, 1000)

13Predicted: [('n02123597', 'Siame_cat', 0.16550788), ('n02108915', 'French_bulldog', 0.14138032), ('n04409515', 'tennis_ball', 0.08570899), ('n02095314' 14

15Process finished with exit code 0

对⽐直接pb预测图⽚的结果

可看到结果⼀致，但tensorrt加速后，速度提升到了0.3376s

实验三：tensorflow的pb模型转化为ONNX模型后(上⾯说明了转变过程），c++去部署

这⾥可以参考博主之前的博客，⾥⾯演⽰的就是拿到onnx模型后怎么去部署，这边就不再赘叙了

实验四：tensorflow的pb转化为UFF模型后(上⾯说明了转变过程），c++去部署

和上⾯博客⼤同⼩异，这⾥是参考的tensorrt⾃带例⼦

博主知识简单的修改了下，让其流程跑通，图像归⼀化只是简单的做了下，详细代码如下（c++的环境配置见上⾯提到的博客）

1#include "BatchStream.h"

青剑湖

2#include "EntropyCalibrator.h"

3#include "argsParr.h"

本文发布于:2023-06-19 00:36:40，感谢您对本站的认可！

本文链接：https://www.wtabcd.cn/fanwen/fan/82/987105.html

上一篇：2023年个人自助小额借款合同小额借款合同纠纷判决书(八篇)

下一篇：最新建设工程借款合同建设工程项目贷款(十六篇)

标签：模型预测博主转化时间名字修改

留言与评论（共有 0 条评论）