其他分享
首页 > 其他分享> > deeplearning量化

deeplearning量化

作者:互联网

deeplearning量化

量化配置

通过字典配置量化参数

TENSORRT_OP_TYPES = [

    'mul', 'conv2d', 'pool2d', 'depthwise_conv2d', 'elementwise_add',

    'leaky_relu'

]

TRANSFORM_PASS_OP_TYPES = ['conv2d', 'depthwise_conv2d', 'mul']

 

QUANT_DEQUANT_PASS_OP_TYPES = [

        "pool2d", "elementwise_add", "concat", "softmax", "argmax", "transpose",

        "equal", "gather", "greater_equal", "greater_than", "less_equal",

        "less_than", "mean", "not_equal", "reshape", "reshape2",

        "bilinear_interp", "nearest_interp", "trilinear_interp", "slice",

        "squeeze", "elementwise_sub", "relu", "relu6", "leaky_relu", "tanh", "swish"

    ]

 

_quant_config_default = {

    # weight quantize type, default is 'channel_wise_abs_max'

    'weight_quantize_type': 'channel_wise_abs_max',

    # activation quantize type, default is 'moving_average_abs_max'

    'activation_quantize_type': 'moving_average_abs_max',

    # weight quantize bit num, default is 8

    'weight_bits': 8,

    # activation quantize bit num, default is 8

    'activation_bits': 8,

    # ops of name_scope in not_quant_pattern list, will not be quantized

    'not_quant_pattern': ['skip_quant'],

    # ops of type in quantize_op_types, will be quantized

    'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'],

    # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'

    'dtype': 'int8',

    # window size for 'range_abs_max' quantization. defaulf is 10000

    'window_size': 10000,

    # The decay coefficient of moving average, default is 0.9

    'moving_rate': 0.9,

    # if True, 'quantize_op_types' will be TENSORRT_OP_TYPES

    'for_tensorrt': False,

    # if True, 'quantoze_op_types' will be TRANSFORM_PASS_OP_TYPES + QUANT_DEQUANT_PASS_OP_TYPES

    'is_full_quantize': False

}

参数:

quant_aware

paddleslim.quant.quant_aware(programplaceconfigscope=Nonefor_test=False)

在 program 中加入量化和反量化op, 用于量化训练。

参数:

返回

含有量化和反量化 operator 的 program 。

返回类型

注解

convert

paddleslim.quant.convert(programplaceconfigscope=Nonesave_int8=False)

把训练好的量化 program ,转换为可用于保存 inference model 的 program 。

参数:

返回

注解

因为该接口会对 op 和 Variable 做相应的删除和修改,所以此接口只能在训练完成之后调用。如果想转化训练的中间模型,可加载相应的参数之后再使用此接口。

代码示例

#encoding=utf8

import paddle.fluid as fluid

import paddleslim.quant as quant

 

 

train_program = fluid.Program()

 

with fluid.program_guard(train_program):

    image = fluid.data(name='x', shape=[None, 1, 28, 28])

    label = fluid.data(name='label', shape=[None, 1], dtype='int64')

    conv = fluid.layers.conv2d(image, 32, 1)

    feat = fluid.layers.fc(conv, 10, act='softmax')

    cost = fluid.layers.cross_entropy(input=feat, label=label)

    avg_cost = fluid.layers.mean(x=cost)

 

use_gpu = True

place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()

exe = fluid.Executor(place)

exe.run(fluid.default_startup_program())

eval_program = train_program.clone(for_test=True)

#配置

config = {'weight_quantize_type': 'abs_max',

        'activation_quantize_type': 'moving_average_abs_max'}

build_strategy = fluid.BuildStrategy()

exec_strategy = fluid.ExecutionStrategy()

#调用api

quant_train_program = quant.quant_aware(train_program, place, config, for_test=False)

quant_eval_program = quant.quant_aware(eval_program, place, config, for_test=True)

#关闭策略

build_strategy.fuse_all_reduce_ops = False

build_strategy.sync_batch_norm = False

quant_train_program = quant_train_program.with_data_parallel(

    loss_name=avg_cost.name,

    build_strategy=build_strategy,

    exec_strategy=exec_strategy)

 

inference_prog = quant.convert(quant_eval_program, place, config)

更详细的用法参考 量化训练demo 。

quant_post

paddleslim.quant.quant_post(executor, model_dir, quantize_model_path,sample_generator, model_filename=None, params_filename=None, batch_size=16,batch_nums=None, scope=None, algo='KL', quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"], is_full_quantize=False, weight_bits=8, activation_bits=8, is_use_cache_file=False, cache_dir="./temp_post_training")

 

对保存在 ${model_dir} 下的模型进行量化,使用 sample_generator 的数据进行参数校正。

参数:

返回

无。

注解

代码示例

警告

此示例不能直接运行,因为需要加载 ${model_dir} 下的模型,所以不能直接运行。

import paddle.fluid as fluid

import paddle.dataset.mnist as reader

from paddleslim.quant import quant_post

val_reader = reader.train()

use_gpu = True

place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()

 

exe = fluid.Executor(place)

quant_post(

        executor=exe,

        model_dir='./model_path',

        quantize_model_path='./save_path',

        sample_generator=val_reader,

        model_filename='__model__',

        params_filename='__params__',

        batch_size=16,

        batch_nums=10)

quant_embedding

paddleslim.quant.quant_embedding(programplaceconfigscope=None)

对 Embedding 参数进行量化。

参数:

返回

量化之后的program

返回类型

fluid.Program

代码示例

import paddle.fluid as fluid

import paddleslim.quant as quant

 

train_program = fluid.Program()

with fluid.program_guard(train_program):

    input_word = fluid.data(name="input_word", shape=[None, 1], dtype='int64')

    input_emb = fluid.embedding(

        input=input_word,

        is_sparse=False,

        size=[100, 128],

        param_attr=fluid.ParamAttr(name='emb',

        initializer=fluid.initializer.Uniform(-0.005, 0.005)))

 

infer_program = train_program.clone(for_test=True)

 

use_gpu = True

place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()

exe = fluid.Executor(place)

exe.run(fluid.default_startup_program())

 

config = {'params_name': 'emb', 'quantize_type': 'abs_max'}

quant_program = quant.quant_embedding(infer_program, place, config)

 

标签:abs,max,deeplearning,fluid,program,quant,量化
来源: https://www.cnblogs.com/wujianming-110117/p/14424088.html