pytorch 量化⽅法总结
量化⽅法
1. Training Dynamic Quantization:这是最简单的⼀种量化⽅法,Post Training指的是在浮点模型训练收敛之后进⾏量化操作,其中weight被提前量化,⽽activation在前向推理过程中被动态量化,即每次都要根据实际运算的浮点数据范围每⼀层计算⼀次scale和zero_point,然后进⾏量化;
2. Post Training Static Quantization:第⼀种不是很常见,⼀般说的Post Training Quantization指的其实是这种静态的⽅法,⽽且这种⽅法是最常⽤的,其中weight跟上述⼀样也是被提前量化好的,然后activation也会基于之前校准过程中记录下的固定的scale和zero_point进⾏量化,整个过程不存在量化参数(scale和zero_point)的再计算;
3. Quantization Aware Training:对于⼀些在浮点训练+量化过程中精度损失⽐较严重的情况,就需要进⾏量化感知训练,即在训练过程中模拟量化过程,数据虽然都是表⽰为float32,但实际的值的间隔却会受到量化参数的限制。
FBGEMM 编译安装
3.编译FBGEMM
4.⽀持FBGEMM应该是要源码编译torch,否则会报错:quantized engine FBGEMM is not supported
测试速度demo
简单测试⼀下速度,qnnpack作为后端可能⽐较慢,因为会提⽰你硬件不⽀持:
[W NNPACK.cpp:80] Could not initialize NNPACK! Reason: port LD_LIBRARY_PATH =/home /xywang /intel /oneapi /mkl /2021.2.0/lib /intel64/:$LD_LIBRARY_PATH export PATH =/home /xywang /intel /oneapi /mkl /2021.2.0/bin :$PATH
1
2git clone --recursive https ://github .com /pytorch /FBGEMM .git cd FBGEMM mkdir build && cd build cmake ..make
1
2来例假能吃阿胶吗
3
4梦见自己拉稀
5import os import time import torch .nn as nn from torch .quantization import QuantStub , DeQuantStub backend = 'qnnpack'# backend = 'fbgemm'import torch torch .backends .quantized .engine = backend class DownBlockQ (nn .Module ): def __init__(lf , in_ch , out_ch ): super ().__init__() lf .quant_input = QuantStub () lf .dequant_output = DeQuantStub () lf .conv1 = nn .Conv2d (in_ch , in_ch , 4, stride =2, padding =1, groups =in_ch ) lf .bn1 = nn .BatchNorm2d (in_ch ) lf .relu1 = nn .ReLU () lf .conv2 = nn .Conv2d (in_ch , out_ch , 1) lf .bn2 = nn .BatchNorm2d (out_ch ) lf .relu2 = nn .ReLU ()赵薇哪里人
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
def forward (lf , x ): # x = lf.quant_input(x) x = lf .conv1(x ) x = lf .bn1(x ) x = lf .relu1(x ) x = lf .conv2(x ) x = lf .bn2(x ) x = lf .relu2(x ) # x = lf.dequant_output(x) return x def fu_model (lf ): torch .quantization .fu_modules
(lf , ['conv1', 'bn1', 'relu1'], inplace =True ) torch .quantization .fu_modules (lf , ['conv2', 'bn2', 'relu2'], inplace =True )class Model (nn .Module ): def __init__(lf , filters =22): super ().__init__() lf .quant_input = QuantStub () lf .dequant_output = DeQuantStub () lf .db1 = DownBlockQ (filters * 1, filters * 2) # 128 lf .db2 = DownBlockQ (filters * 2, filters * 4) # 64 lf .db3 = DownBlockQ (filters * 4, filters * 8) # 32 def forward (lf , x ): x = lf .quant_input (x ) x = lf .db1(x ) x = lf .db2(x ) x = lf .db3(x ) x = lf .dequant_output (x ) return x def fu_model (model ): if hasattr (model , 'fu_model'): model .fu_model () for p in list (model .modules ())[1:]: fu_model (p )def print_size_of_model (model ): torch .save (model .state_dict (), "temp.p") print ('Size (MB):', os .path .getsize ("temp.p") / 1e6) os .remove ('temp.p')def benchmark (func , iters =10, *args ): t1 = time .time () for _ in range (iters ): res = func (*args ) print (f '{((time.time() - t1) / iters):.6f} c') return res def quantize (): dummy = torch .rand (1, 22, 256, 256) # model = DownBlockQ(22 * 1, 22 * 2) model = Model (filters =22) model = model .eval () print ("Before quantization") print_size_of_model (model )
26
27
28
29
30
31
32
33
34
35
36
37
38
39枸杞茶的功效与作用
40
41
有效工作时间>国际礼仪培训42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
建筑方案65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
感知量化训练cifar10分类demo print_size_of_model (model ) benchmark (model , 20, dummy ) # print(model) fu_model (model ) model .qconfig = torch .quantization .get_default_qconfig (backend ) # print(model.qconfig) torch .quantization .prepare (model , inplace =True ) torch .q
uantization .convert (model , inplace =True ) # print(model) print ("After quantization") print_size_of_model (model ) benchmark (model , 20, dummy ) # torch.jit.script(model).save('models/model_scripted.pt')if __name__ == '__main__': quantize ()
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110# resnet.py # Modified from # /pytorch/vision/blob/relea/0.8.0/torchvision/mo
dels/resnet.py import torch from torch import Tensor import torch .nn as nn from torchvision .models .utils import load_state_dict_from_url from typing import Type , Any , Callable , Union , List , Optional __all__ = [ 'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2', 'wide_resnet101_2']model_urls = { 'resnet18': 'download.pytorch/models/resnet18-5c106cde.pth', 'resnet34': 'download.pytorch/models/resnet34-333f7ec4.pth', 'resnet50': 'download.pytorch/models/resnet50-19c8e357.pth', 'resnet101': 'download.pytorch/models/resnet101-5d3b4d8f.pth', 'resnet152': 'download.pytorch/models/resnet152-b121ed2d.pth', 'resnext50_32x4d': 'download.pytorch/models/resnext50_32x4d-7cdf4587.pth', 'resnext101_32x8d': 'download.pytorch/models/resnext101_32x8d-8ba56ff5.pth', 'wide_resnet50_2': 'download.pytorch/models/wide_resnet50_2-95faca4d.pth', 'wide_resnet101_2': 'download.pytorch/models/wide_resnet101_2-32ee1156.pth',}def conv3x3(in_planes : int , out_planes : int ,
1
2
3
4
5
6
7
8
9
10
疏不间亲是什么意思11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
stride : int = 1, groups : int = 1, dilation : int = 1) -> nn .Conv2d : """3x3 convolution with padding""" return nn .Conv2d (in_planes , out_planes , kernel_size =3, stride =stride , padding =dilation , groups =groups , bias =Fal , dilation =dilation )def conv1x1(in_planes : int , out_planes : int , stride : int = 1) -> nn .Conv2d : """1x1 convolution""" return nn .Conv2d (in_planes , out_planes , kernel_size =1, stride =stride , bias =Fal )class BasicBlock (nn .Module ): expansion : int = 1 def __init__( lf , inplanes : int , planes : int , stride : int = 1, downsample : Optional [nn .Module ] = None , groups : int = 1, ba_width : int = 64, dilation : int = 1, norm_layer : Optional [
Callable [..., nn .Module ]] = None ) -> None : super (BasicBlock , lf ).__init__() if norm_layer is None : norm_layer = nn .BatchNorm2d if groups != 1 or ba_width != 64: rai ValueError ( 'BasicBlock only supports groups=1 and ba_width=64') if dilation > 1: rai NotImplementedError ( "Dilation > 1 not supported in BasicBlock") # v1 and lf.downsample layers downsample the input when stride != 1 lf .conv1 = conv3x3(inplanes , planes , stride ) lf .bn1 = norm_layer (planes ) # Rename relu to relu1 lf .relu1 = nn .ReLU (inplace =True ) lf .conv2 = conv3x3(planes , planes ) lf .bn2 = norm_layer (planes ) lf .downsample = downsample lf .stride = stride lf .skip_add = nn .quantized .FloatFunctional () # Remember to u two independent ReLU for layer fusion. lf .relu2 = nn .ReLU (inplace =True ) def forward (lf , x : Tensor ) -> Tensor : identity = x out = lf .conv1(x ) out = lf .bn1(out ) out = lf .relu1(out ) out = lf .conv2(out )
404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
out = lf .conv2(out ) out = lf .bn2(out ) if lf .downsample is not None : identity = lf .downsample (x ) # U FloatFunctional for addition for quantization compatibility # out += identity # out = torch.add(identity, out) out = lf .skip_add .add (identity , out ) o
ut = lf .relu2(out ) return out class Bottleneck (nn .Module ): # Bottleneck in torchvision places the stride for downsampling at 3x3 v2) # while original implementation places the stride at the first 1x1 v1) # according to "Deep residual learning for image recognition"arxiv/abs/1512.03385. # This variant is also known as ResNet V1.5 and improves accuracy according to # /catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. expansion : int = 4 def __init__( lf , inplanes : int , planes : int , stride : int = 1, downsample : Optional [nn .Module ] = None , groups : int = 1, ba_width : int = 64, dilation : int = 1, norm_layer : Optional [Callable [..., nn .Module ]] = None ) -> None : super (Bottleneck , lf ).__init__() if norm_layer is None : norm_layer = nn .BatchNorm2d width = int (planes * (ba_width / 64.)) * groups # v2 and lf.downsample layers downsample the input when stride != 1 lf .conv1 = conv1x1(inplanes , width ) lf .bn1 = norm_layer (width ) lf .conv2 = conv3x3(width , width , stride , groups , dilation ) lf .bn2 = norm_layer (width ) lf .conv3 = conv1x1(width , planes * lf .expansion ) lf .bn3 = norm_layer (planes * lf .expansion ) lf .relu1 = nn .ReLU (inplace =True ) lf .downsample = downsample lf .stride = stride lf .skip_add = nn .quantized .FloatFunctional () lf .relu2 = nn .ReLU (inplace =True ) def forward (lf , x : Tensor ) -> Te
nsor : identity = x out = lf .conv1(x ) out = lf .bn1(out ) out = lf .relu1(out ) out = lf .conv2(out ) out = lf .bn2(out ) out = lf .relu (out ) out = lf .conv3(out ) out = lf .bn3(out ) if lf .downsample is not None :105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170