pytorch量化方法总结

更新时间:2023-06-09 10:30:31 阅读: 评论:0

pytorch 量化⽅法总结
量化⽅法
1. Training Dynamic Quantization:这是最简单的⼀种量化⽅法,Post Training指的是在浮点模型训练收敛之后进⾏量化操作,其中weight被提前量化,⽽activation在前向推理过程中被动态量化,即每次都要根据实际运算的浮点数据范围每⼀层计算⼀次scale和zero_point,然后进⾏量化;
2. Post Training Static Quantization:第⼀种不是很常见,⼀般说的Post Training Quantization指的其实是这种静态的⽅法,⽽且这种⽅法是最常⽤的,其中weight跟上述⼀样也是被提前量化好的,然后activation也会基于之前校准过程中记录下的固定的scale和zero_point进⾏量化,整个过程不存在量化参数(scale和zero_point)的再计算;
3. Quantization Aware Training:对于⼀些在浮点训练+量化过程中精度损失⽐较严重的情况,就需要进⾏量化感知训练,即在训练过程中模拟量化过程,数据虽然都是表⽰为float32,但实际的值的间隔却会受到量化参数的限制。
FBGEMM 编译安装
3.编译FBGEMM
4.⽀持FBGEMM应该是要源码编译torch,否则会报错:quantized engine FBGEMM is not supported
测试速度demo
简单测试⼀下速度,qnnpack作为后端可能⽐较慢,因为会提⽰你硬件不⽀持:
[W NNPACK.cpp:80] Could not initialize NNPACK! Reason: port LD_LIBRARY_PATH =/home /xywang /intel /oneapi /mkl /2021.2.0/lib /intel64/:$LD_LIBRARY_PATH export PATH =/home /xywang /intel /oneapi /mkl /2021.2.0/bin :$PATH
1
2git clone --recursive https ://github .com /pytorch /FBGEMM .git cd FBGEMM mkdir build && cd build cmake ..make
1
2来例假能吃阿胶吗
3
4梦见自己拉稀
5import  os import  time import  torch .nn as  nn from  torch .quantization import  QuantStub , DeQuantStub backend = 'qnnpack'# backend = 'fbgemm'import  torch torch .backends .quantized .engine = backend class  DownBlockQ (nn .Module ):    def  __init__(lf , in_ch , out_ch ):        super ().__init__()        lf .quant_input = QuantStub ()        lf .dequant_output = DeQuantStub ()        lf .conv1 = nn .Conv2d (in_ch , in_ch , 4, stride =2, padding =1, groups =in_ch )        lf .bn1 = nn .BatchNorm2d (in_ch )        lf .relu1 = nn .ReLU ()        lf .conv2 = nn .Conv2d (in_ch , out_ch , 1)        lf .bn2 = nn .BatchNorm2d (out_ch )        lf .relu2 = nn .ReLU ()赵薇哪里人
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
def  forward (lf , x ):        # x = lf.quant_input(x)        x = lf .conv1(x )        x = lf .bn1(x )        x = lf .relu1(x )        x = lf .conv2(x )        x = lf .bn2(x )        x = lf .relu2(x )        # x = lf.dequant_output(x)        return  x    def  fu_model (lf ):        torch .quantization .fu_modules
(lf , ['conv1', 'bn1', 'relu1'], inplace =True )        torch .quantization .fu_modules (lf , ['conv2', 'bn2', 'relu2'], inplace =True )class  Model (nn .Module ):    def  __init__(lf , filters =22):        super ().__init__()        lf .quant_input = QuantStub ()        lf .dequant_output = DeQuantStub ()        lf .db1 = DownBlockQ (filters * 1, filters * 2)  # 128        lf .db2 = DownBlockQ (filters * 2, filters * 4)  # 64        lf .db3 = DownBlockQ (filters * 4, filters * 8)  # 32    def  forward (lf , x ):        x = lf .quant_input (x )        x = lf .db1(x )        x = lf .db2(x )        x = lf .db3(x )        x = lf .dequant_output (x )        return  x def  fu_model (model ):    if  hasattr (model , 'fu_model'):        model .fu_model ()    for  p in  list (model .modules ())[1:]:        fu_model (p )def  print_size_of_model (model ):    torch .save (model .state_dict (), "temp.p")    print ('Size (MB):', os .path .getsize ("temp.p") / 1e6)    os .remove ('temp.p')def  benchmark (func , iters =10, *args ):    t1 = time .time ()    for  _ in  range (iters ):        res = func (*args )    print (f '{((time.time() - t1) / iters):.6f} c')    return  res def  quantize ():    dummy = torch .rand (1, 22, 256, 256)    # model = DownBlockQ(22 * 1, 22 * 2)    model = Model (filters =22)    model = model .eval ()    print ("Before quantization")    print_size_of_model (model )
26
27
28
29
30
31
32
33
34
35
36
37
38
39枸杞茶的功效与作用
40
41
有效工作时间>国际礼仪培训42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
建筑方案65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
感知量化训练cifar10分类demo    print_size_of_model (model )    benchmark (model , 20, dummy )    # print(model)    fu_model (model )    model .qconfig = torch .quantization .get_default_qconfig (backend )    # print(model.qconfig)    torch .quantization .prepare (model , inplace =True )    torch .q
uantization .convert (model , inplace =True )    # print(model)    print ("After quantization")    print_size_of_model (model )    benchmark (model , 20, dummy )    # torch.jit.script(model).save('models/model_scripted.pt')if  __name__ == '__main__':    quantize ()
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110# resnet.py # Modified from # /pytorch/vision/blob/relea/0.8.0/torchvision/mo
dels/resnet.py import  torch from  torch import  Tensor import  torch .nn as  nn from  torchvision .models .utils import  load_state_dict_from_url from  typing import  Type , Any , Callable , Union , List , Optional __all__ = [    'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152',    'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2',    'wide_resnet101_2']model_urls = {    'resnet18':    'download.pytorch/models/resnet18-5c106cde.pth',    'resnet34':    'download.pytorch/models/resnet34-333f7ec4.pth',    'resnet50':    'download.pytorch/models/resnet50-19c8e357.pth',    'resnet101':    'download.pytorch/models/resnet101-5d3b4d8f.pth',    'resnet152':    'download.pytorch/models/resnet152-b121ed2d.pth',    'resnext50_32x4d':    'download.pytorch/models/resnext50_32x4d-7cdf4587.pth',    'resnext101_32x8d':    'download.pytorch/models/resnext101_32x8d-8ba56ff5.pth',    'wide_resnet50_2':    'download.pytorch/models/wide_resnet50_2-95faca4d.pth',    'wide_resnet101_2':    'download.pytorch/models/wide_resnet101_2-32ee1156.pth',}def  conv3x3(in_planes : int ,            out_planes : int ,
1
2
3
4
5
6
7
8
9
10
疏不间亲是什么意思11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
stride : int  = 1,            groups : int  = 1,            dilation : int  = 1) -> nn .Conv2d :    """3x3 convolution with padding"""    return  nn .Conv2d (in_planes ,                    out_planes ,                    kernel_size =3,                    stride =stride ,                    padding =dilation ,                    groups =groups ,                    bias =Fal ,                    dilation =dilation )def  conv1x1(in_planes : int , out_planes : int , stride : int  = 1) -> nn .Conv2d :    """1x1 convolution"""    return  nn .Conv2d (in_planes ,                    out_planes ,                    kernel_size =1,                    stride =stride ,                    bias =Fal )class  BasicBlock (nn .Module ):    expansion : int  = 1    def  __init__(            lf ,            inplanes : int ,            planes : int ,            stride : int  = 1,            downsample : Optional [nn .Module ] = None ,            groups : int  = 1,            ba_width : int  = 64,            dilation : int  = 1,            norm_layer : Optional [
Callable [..., nn .Module ]] = None ) -> None :        super (BasicBlock , lf ).__init__()        if  norm_layer is  None :            norm_layer = nn .BatchNorm2d        if  groups != 1 or  ba_width != 64:            rai  ValueError (                'BasicBlock only supports groups=1 and ba_width=64')        if  dilation > 1:            rai  NotImplementedError (                "Dilation > 1 not supported in BasicBlock")        # v1 and lf.downsample layers downsample the input when stride != 1        lf .conv1 = conv3x3(inplanes , planes , stride )        lf .bn1 = norm_layer (planes )        # Rename relu to relu1        lf .relu1 = nn .ReLU (inplace =True )        lf .conv2 = conv3x3(planes , planes )        lf .bn2 = norm_layer (planes )        lf .downsample = downsample        lf .stride = stride        lf .skip_add = nn .quantized .FloatFunctional ()        # Remember to u two independent ReLU for layer fusion.        lf .relu2 = nn .ReLU (inplace =True )    def  forward (lf , x : Tensor ) -> Tensor :        identity = x        out = lf .conv1(x )        out = lf .bn1(out )        out = lf .relu1(out )        out = lf .conv2(out )
404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
out = lf .conv2(out )        out = lf .bn2(out )        if  lf .downsample is  not  None :            identity = lf .downsample (x )        # U FloatFunctional for addition for quantization compatibility        # out += identity        # out = torch.add(identity, out)        out = lf .skip_add .add (identity , out )        o
ut = lf .relu2(out )        return  out class  Bottleneck (nn .Module ):    # Bottleneck in torchvision places the stride for downsampling at 3x3 v2)    # while original implementation places the stride at the first 1x1 v1)    # according to "Deep residual learning for image recognition"arxiv/abs/1512.03385.    # This variant is also known as ResNet V1.5 and improves accuracy according to    # /catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.    expansion : int  = 4    def  __init__(            lf ,            inplanes : int ,            planes : int ,            stride : int  = 1,            downsample : Optional [nn .Module ] = None ,            groups : int  = 1,            ba_width : int  = 64,            dilation : int  = 1,            norm_layer : Optional [Callable [..., nn .Module ]] = None ) -> None :        super (Bottleneck , lf ).__init__()        if  norm_layer is  None :            norm_layer = nn .BatchNorm2d        width = int (planes * (ba_width / 64.)) * groups        # v2 and lf.downsample layers downsample the input when stride != 1        lf .conv1 = conv1x1(inplanes , width )        lf .bn1 = norm_layer (width )        lf .conv2 = conv3x3(width , width , stride , groups , dilation )        lf .bn2 = norm_layer (width )        lf .conv3 = conv1x1(width , planes * lf .expansion )        lf .bn3 = norm_layer (planes * lf .expansion )        lf .relu1 = nn .ReLU (inplace =True )        lf .downsample = downsample        lf .stride = stride        lf .skip_add = nn .quantized .FloatFunctional ()        lf .relu2 = nn .ReLU (inplace =True )    def  forward (lf , x : Tensor ) -> Te
nsor :        identity = x        out = lf .conv1(x )        out = lf .bn1(out )        out = lf .relu1(out )        out = lf .conv2(out )        out = lf .bn2(out )        out = lf .relu (out )        out = lf .conv3(out )        out = lf .bn3(out )        if  lf .downsample is  not  None :105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170

本文发布于:2023-06-09 10:30:31,感谢您对本站的认可!

本文链接:https://www.wtabcd.cn/fanwen/fan/82/910461.html

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。

标签:量化   过程   训练   浮点   编译   速度
相关文章
留言与评论(共有 0 条评论)
   
验证码:
推荐文章
排行榜
Copyright ©2019-2022 Comsenz Inc.Powered by © 专利检索| 网站地图