Files
kunlun/export/inc/cnn/quantize_and_dequantize.h
2024-09-28 14:24:04 +08:00

146 lines
6.6 KiB
C
Executable File

/****************************************************************************
Copyright(c) 2019 by Aerospace C.Power (Chongqing) Microelectronics. ALL RIGHTS RESERVED.
This Information is proprietary to Aerospace C.Power (Chongqing) Microelectronics and MAY NOT
be copied by any method or incorporated into another program without
the express written consent of Aerospace C.Power. This Information or any portion
thereof remains the property of Aerospace C.Power. The Information contained herein
is believed to be accurate and Aerospace C.Power assumes no responsibility or
liability for its use in any way and conveys no license or title under
any patent or copyright and makes no representation or warranty that this
Information is free from patent or copyright infringement.
****************************************************************************/
#ifndef QUANTIZE_AND_DEQUANTIZE_H
#define QUANTIZE_AND_DEQUANTIZE_H
/* os shim includes */
#include "os_types_api.h"
#ifdef __cplusplus
extern "C" {
#endif
/** \defgroup quantize_and_dequantize API
* @brief quantize_and_dequantize APIs
* These functions could be used for quantize float numbers to 8bit or 16bit numbers
*
*/
/** @addtogroup quantize_and_dequantize_APIs
* @{
*/
/* @brief quantize_to_int8() - to quantize a sequence of float number input to int8, output[i] = floor(clamp(input[i] * scale + bias, -128.0, 127.0)),
* @param input: pointer to input float numbers
* @param out: pointer to output uint8 numbers
* @param length: the length of input number sequence, therefore, input occupies 4 * length bytes in ddr
* @param scale: constant
* @param bias: constant
* @return 0: success
* @return 1 -- input address is not a multiple of 32
* @return 2 -- out address is not a multiple of 32
* @return 3 -- length is not a multiple of 8
*/
uint8_t quantize_to_int8(float *input, int8_t *out, uint32_t length, float scale, float bias);
/* @brief quantize_to_uint8() - to quantize a sequence of float number input to uint8, output[i] = floor(clamp(input[i] * scale + bias, 0, 255.0)),
* @param input: pointer to input float numbers
* @param out: pointer to output uint8 numbers
* @param length: the length of input number sequence, therefore, input occupies 4 * length bytes in ddr
* @param scale: constant
* @param bias: constant
* @return 0: success
* @return 1 -- input address is not a multiple of 32
* @return 2 -- out address is not a multiple of 32
* @return 3 -- length is not a multiple of 8
*/
uint8_t quantize_to_uint8(float *input, uint8_t *out, uint32_t length, float scale, float bias);
/* @brief quantize_to_int16() - to quantize a sequence of float number input to int16, output[i] = floor(clamp(input[i] * scale + bias, -32768.0, 32767.0)),
* @param input: pointer to input float numbers
* @param out: pointer to output uint16 numbers
* @param length: the length of input number sequence, therefore, input occupies 4 * length bytes in ddr
* @param scale: constant
* @param bias: constant
* @return 0: success
* @return 1 -- input address is not a multiple of 32
* @return 2 -- out address is not a multiple of 32
* @return 3 -- length is not a multiple of 8
*/
uint8_t quantize_to_int16(float *input, int16_t *out, uint32_t length, float scale, float bias);
/* @brief quantize_to_uint16() - to quantize a sequence of float number input to uint16, output[i] = floor(clamp(input[i] * scale + bias, 0, 65535.0)),
* @param input: pointer to input float numbers
* @param out: pointer to output uint16 numbers
* @param length: the length of input number sequence, therefore, input occupies 4 * length bytes in ddr
* @param scale: constant
* @param bias: constant
* @return 0: success
* @return 1 -- input address is not a multiple of 32
* @return 2 -- out address is not a multiple of 32
* @return 3 -- length is not a multiple of 8
*/
uint8_t quantize_to_uint16(float *input, uint16_t *out, uint32_t length, float scale, float bias);
/* @brief dequantize_from_int8() - to dequantize a sequence of int8 numbers to float, output[i] = (input[i] + bias) * scale
* @param input: pointer to input numbers
* @param out: pointer to output numbers
* @param length: the length of input number sequence, therefore, output occupies 4 * length bytes in ddr
* @param scale: constant
* @param bias: constant
* @return 0: success
* @return 1 -- input address is not a multiple of 32
* @return 2 -- out address is not a multiple of 32
* @return 3 -- length is not a multiple of 8
*/
uint8_t dequantize_from_int8(int8_t *input, float *output, uint32_t length, float scale, float bias);
/* @brief dequantize_from_uint8() - to dequantize a sequence of uint8 numbers to float, output[i] = (input[i] + bias) * scale
* @param input: pointer to input numbers
* @param out: pointer to output numbers
* @param length: the length of input number sequence, therefore, output occupies 4 * length bytes in ddr
* @param scale: constant
* @param bias: constant
* @return 0: success
* @return 1 -- input address is not a multiple of 32
* @return 2 -- out address is not a multiple of 32
* @return 3 -- length is not a multiple of 8
*/
uint8_t dequantize_from_uint8(uint8_t *input, float *output, uint32_t length, float scale, float bias);
/* @brief dequantize_from_int16() - to dequantize a sequence of int16 numbers to float, output[i] = (input[i] + bias) * scale
* @param input: pointer to input numbers
* @param out: pointer to output numbers
* @param length: the length of input number sequence, therefore, output occupies 4 * length bytes in ddr
* @param scale: constant
* @param bias: constant
* @return 0: success
* @return 1 -- input address is not a multiple of 32
* @return 2 -- out address is not a multiple of 32
* @return 3 -- length is not a multiple of 8
*/
uint8_t dequantize_from_int16(int16_t *input, float *output, uint32_t length, float scale, float bias);
/* @brief dequantize_from_uint16() - to dequantize a sequence of uint16 numbers to float, output[i] = (input[i] + bias) * scale
* @param input: pointer to input numbers
* @param out: pointer to output numbers
* @param length: the length of input number sequence, therefore, output occupies 4 * length bytes in ddr
* @param scale: constant
* @param bias: constant
* @return 0: success
* @return 1 -- input address is not a multiple of 32
* @return 2 -- out address is not a multiple of 32
* @return 3 -- length is not a multiple of 8
*/
uint8_t dequantize_from_uint16(uint16_t *input, float *output, uint32_t length, float scale, float bias);
/**
* @}
*/
#ifdef __cplusplus
}
#endif
#endif