pytorch量化方法总结

更新时间:2023-06-09 10:30:31 阅读：评论：0

pytorch 量化⽅法总结

量化⽅法

1. Training Dynamic Quantization：这是最简单的⼀种量化⽅法，Post Training指的是在浮点模型训练收敛之后进⾏量化操作，其中weight被提前量化，⽽activation在前向推理过程中被动态量化，即每次都要根据实际运算的浮点数据范围每⼀层计算⼀次scale和zero_point，然后进⾏量化；

2. Post Training Static Quantization：第⼀种不是很常见，⼀般说的Post Training Quantization指的其实是这种静态的⽅法，⽽且这种⽅法是最常⽤的，其中weight跟上述⼀样也是被提前量化好的，然后activation也会基于之前校准过程中记录下的固定的scale和zero_point进⾏量化，整个过程不存在量化参数(scale和zero_point)的再计算；

3. Quantization Aware Training：对于⼀些在浮点训练+量化过程中精度损失⽐较严重的情况，就需要进⾏量化感知训练，即在训练过程中模拟量化过程，数据虽然都是表⽰为float32，但实际的值的间隔却会受到量化参数的限制。

FBGEMM 编译安装

3.编译FBGEMM

4.⽀持FBGEMM应该是要源码编译torch，否则会报错：quantized engine FBGEMM is not supported

测试速度demo

简单测试⼀下速度，qnnpack作为后端可能⽐较慢，因为会提⽰你硬件不⽀持：

[W NNPACK.cpp:80] Could not initialize NNPACK! Reason: port LD_LIBRARY_PATH =/home /xywang /intel /oneapi /mkl /2021.2.0/lib /intel64/:$LD_LIBRARY_PATH export PATH =/home /xywang /intel /oneapi /mkl /2021.2.0/bin :$PATH

2git clone --recursive https ://github .com /pytorch /FBGEMM .git cd FBGEMM mkdir build && cd build cmake ..make

2来例假能吃阿胶吗

4梦见自己拉稀

5import os import time import torch .nn as nn from torch .quantization import QuantStub , DeQuantStub backend = 'qnnpack'# backend = 'fbgemm'import torch torch .backends .quantized .engine = backend class DownBlockQ (nn .Module ): def __init__(lf , in_ch , out_ch ): super ().__init__() lf .quant_input = QuantStub () lf .dequant_output = DeQuantStub () lf .conv1 = nn .Conv2d (in_ch , in_ch , 4, stride =2, padding =1, groups =in_ch ) lf .bn1 = nn .BatchNorm2d (in_ch ) lf .relu1 = nn .ReLU () lf .conv2 = nn .Conv2d (in_ch , out_ch , 1) lf .bn2 = nn .BatchNorm2d (out_ch ) lf .relu2 = nn .ReLU ()赵薇哪里人

def forward (lf , x ): # x = lf.quant_input(x) x = lf .conv1(x ) x = lf .bn1(x ) x = lf .relu1(x ) x = lf .conv2(x ) x = lf .bn2(x ) x = lf .relu2(x ) # x = lf.dequant_output(x) return x def fu_model (lf ): torch .quantization .fu_modules

(lf , ['conv1', 'bn1', 'relu1'], inplace =True ) torch .quantization .fu_modules (lf , ['conv2', 'bn2', 'relu2'], inplace =True )class Model (nn .Module ): def __init__(lf , filters =22): super ().__init__() lf .quant_input = QuantStub () lf .dequant_output = DeQuantStub () lf .db1 = DownBlockQ (filters * 1, filters * 2) # 128 lf .db2 = DownBlockQ (filters * 2, filters * 4) # 64 lf .db3 = DownBlockQ (filters * 4, filters * 8) # 32 def forward (lf , x ): x = lf .quant_input (x ) x = lf .db1(x ) x = lf .db2(x ) x = lf .db3(x ) x = lf .dequant_output (x ) return x def fu_model (model ): if hasattr (model , 'fu_model'): model .fu_model () for p in list (model .modules ())[1:]: fu_model (p )def print_size_of_model (model ): torch .save (model .state_dict (), "temp.p") print ('Size (MB):', os .path .getsize ("temp.p") / 1e6) os .remove ('temp.p')def benchmark (func , iters =10, *args ): t1 = time .time () for _ in range (iters ): res = func (*args ) print (f '{((time.time() - t1) / iters):.6f} c') return res def quantize (): dummy = torch .rand (1, 22, 256, 256) # model = DownBlockQ(22 * 1, 22 * 2) model = Model (filters =22) model = model .eval () print ("Before quantization") print_size_of_model (model )

39枸杞茶的功效与作用

有效工作时间>国际礼仪培训42

建筑方案65

感知量化训练cifar10分类demo print_size_of_model (model ) benchmark (model , 20, dummy ) # print(model) fu_model (model ) model .qconfig = torch .quantization .get_default_qconfig (backend ) # print(model.qconfig) torch .quantization .prepare (model , inplace =True ) torch .q

uantization .convert (model , inplace =True ) # print(model) print ("After quantization") print_size_of_model (model ) benchmark (model , 20, dummy ) # torch.jit.script(model).save('models/model_scripted.pt')if __name__ == '__main__': quantize ()

100

101

102

103

104

105

106

107

108

109

110# resnet.py # Modified from # /pytorch/vision/blob/relea/0.8.0/torchvision/mo

dels/resnet.py import torch from torch import Tensor import torch .nn as nn from torchvision .models .utils import load_state_dict_from_url from typing import Type , Any , Callable , Union , List , Optional __all__ = [ 'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2', 'wide_resnet101_2']model_urls = { 'resnet18': 'download.pytorch/models/resnet18-5c106cde.pth', 'resnet34': 'download.pytorch/models/resnet34-333f7ec4.pth', 'resnet50': 'download.pytorch/models/resnet50-19c8e357.pth', 'resnet101': 'download.pytorch/models/resnet101-5d3b4d8f.pth', 'resnet152': 'download.pytorch/models/resnet152-b121ed2d.pth', 'resnext50_32x4d': 'download.pytorch/models/resnext50_32x4d-7cdf4587.pth', 'resnext101_32x8d': 'download.pytorch/models/resnext101_32x8d-8ba56ff5.pth', 'wide_resnet50_2': 'download.pytorch/models/wide_resnet50_2-95faca4d.pth', 'wide_resnet101_2': 'download.pytorch/models/wide_resnet101_2-32ee1156.pth',}def conv3x3(in_planes : int , out_planes : int ,

疏不间亲是什么意思11

stride : int = 1, groups : int = 1, dilation : int = 1) -> nn .Conv2d : """3x3 convolution with padding""" return nn .Conv2d (in_planes , out_planes , kernel_size =3, stride =stride , padding =dilation , groups =groups , bias =Fal , dilation =dilation )def conv1x1(in_planes : int , out_planes : int , stride : int = 1) -> nn .Conv2d : """1x1 convolution""" return nn .Conv2d (in_planes , out_planes , kernel_size =1, stride =stride , bias =Fal )class BasicBlock (nn .Module ): expansion : int = 1 def __init__( lf , inplanes : int , planes : int , stride : int = 1, downsample : Optional [nn .Module ] = None , groups : int = 1, ba_width : int = 64, dilation : int = 1, norm_layer : Optional [

Callable [..., nn .Module ]] = None ) -> None : super (BasicBlock , lf ).__init__() if norm_layer is None : norm_layer = nn .BatchNorm2d if groups != 1 or ba_width != 64: rai ValueError ( 'BasicBlock only supports groups=1 and ba_width=64') if dilation > 1: rai NotImplementedError ( "Dilation > 1 not supported in BasicBlock") # v1 and lf.downsample layers downsample the input when stride != 1 lf .conv1 = conv3x3(inplanes , planes , stride ) lf .bn1 = norm_layer (planes ) # Rename relu to relu1 lf .relu1 = nn .ReLU (inplace =True ) lf .conv2 = conv3x3(planes , planes ) lf .bn2 = norm_layer (planes ) lf .downsample = downsample lf .stride = stride lf .skip_add = nn .quantized .FloatFunctional () # Remember to u two independent ReLU for layer fusion. lf .relu2 = nn .ReLU (inplace =True ) def forward (lf , x : Tensor ) -> Tensor : identity = x out = lf .conv1(x ) out = lf .bn1(out ) out = lf .relu1(out ) out = lf .conv2(out )

404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105

out = lf .conv2(out ) out = lf .bn2(out ) if lf .downsample is not None : identity = lf .downsample (x ) # U FloatFunctional for addition for quantization compatibility # out += identity # out = torch.add(identity, out) out = lf .skip_add .add (identity , out ) o

ut = lf .relu2(out ) return out class Bottleneck (nn .Module ): # Bottleneck in torchvision places the stride for downsampling at 3x3 v2) # while original implementation places the stride at the first 1x1 v1) # according to "Deep residual learning for image recognition"arxiv/abs/1512.03385. # This variant is also known as ResNet V1.5 and improves accuracy according to # /catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. expansion : int = 4 def __init__( lf , inplanes : int , planes : int , stride : int = 1, downsample : Optional [nn .Module ] = None , groups : int = 1, ba_width : int = 64, dilation : int = 1, norm_layer : Optional [Callable [..., nn .Module ]] = None ) -> None : super (Bottleneck , lf ).__init__() if norm_layer is None : norm_layer = nn .BatchNorm2d width = int (planes * (ba_width / 64.)) * groups # v2 and lf.downsample layers downsample the input when stride != 1 lf .conv1 = conv1x1(inplanes , width ) lf .bn1 = norm_layer (width ) lf .conv2 = conv3x3(width , width , stride , groups , dilation ) lf .bn2 = norm_layer (width ) lf .conv3 = conv1x1(width , planes * lf .expansion ) lf .bn3 = norm_layer (planes * lf .expansion ) lf .relu1 = nn .ReLU (inplace =True ) lf .downsample = downsample lf .stride = stride lf .skip_add = nn .quantized .FloatFunctional () lf .relu2 = nn .ReLU (inplace =True ) def forward (lf , x : Tensor ) -> Te

nsor : identity = x out = lf .conv1(x ) out = lf .bn1(out ) out = lf .relu1(out ) out = lf .conv2(out ) out = lf .bn2(out ) out = lf .relu (out ) out = lf .conv3(out ) out = lf .bn3(out ) if lf .downsample is not None :105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170

本文发布于:2023-06-09 10:30:31，感谢您对本站的认可！

本文链接：https://www.wtabcd.cn/fanwen/fan/82/910461.html

上一篇：中学生母亲节活动方案学校母亲节活动主题方案(三篇)

下一篇：2023年教师工作上失职检讨书(4篇)

标签：量化过程训练浮点编译速度

留言与评论（共有 0 条评论）