630 lines
43 KiB
C
Executable File
630 lines
43 KiB
C
Executable File
/****************************************************************************
|
|
|
|
Copyright(c) 2019 by Aerospace C.Power (Chongqing) Microelectronics. ALL RIGHTS RESERVED.
|
|
|
|
This Information is proprietary to Aerospace C.Power (Chongqing) Microelectronics and MAY NOT
|
|
be copied by any method or incorporated into another program without
|
|
the express written consent of Aerospace C.Power. This Information or any portion
|
|
thereof remains the property of Aerospace C.Power. The Information contained herein
|
|
is believed to be accurate and Aerospace C.Power assumes no responsibility or
|
|
liability for its use in any way and conveys no license or title under
|
|
any patent or copyright and makes no representation or warranty that this
|
|
Information is free from patent or copyright infringement.
|
|
|
|
****************************************************************************/
|
|
#ifndef NN_FUNCTION_H
|
|
#define NN_FUNCTION_H
|
|
/* os shim includes */
|
|
#include "os_types.h"
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
/** \defgroup linear nn API
|
|
* @brief linear nn APIs
|
|
* Linear neural network functions, including FC(fully connect) and 1D conv
|
|
*/
|
|
|
|
/** @addtogroup linear_nn_APIs
|
|
* @{
|
|
*/
|
|
|
|
struct conv1d_config_bean {
|
|
uint32_t in_channel; // number of channel of input
|
|
uint32_t in_length; // length of input
|
|
uint32_t out_channel; // number of channel of output
|
|
uint32_t out_length; // length of output, which will be set after conv calculation is completed
|
|
uint32_t kernel_length; // length of conv kernel
|
|
uint32_t stride; // stride of conv
|
|
uint32_t dilation; // dilation of conv
|
|
uint32_t padding_left; // padding on the left
|
|
uint32_t padding_right; // padding on the right
|
|
uint32_t group; // group of the conv
|
|
};
|
|
|
|
/* @brief fc_int8_to_int8_weight_8bit_batch() - fully connect operation as below:
|
|
output[k, i] = (sum(input[k, j] * weight[i, j]) + bias[i]) >> output_right_shift
|
|
where the sum is done on index j.
|
|
saturation included, i.e. output elements out of range [-128, 127] will be set to -128 or 127
|
|
* @param input: where the input, a matrix of size batch * in_length, is put
|
|
* input should be arranged as below, as an example, we set in_length = 28, batch = 6:
|
|
* in[0, 0], in[0, 1], in[0, 2], ..., in[0, 27], 0, 0, 0, 0
|
|
* in[1, 0], in[1, 1], in[1, 2], ..., in[1, 27], 0, 0, 0, 0
|
|
* ...
|
|
* in[5, 0], in[5, 1], in[5, 2], ..., in[5, 27], 0, 0, 0, 0
|
|
* zeros are padded here to guarantee the distance between in[i, j] and in[i + 1, j] is ceil(in_length / 8) * 8 bytes
|
|
* @param output: where the output, a matrix of size batch * out_length, will be put
|
|
* output should be arranged as below, as an example, we set out_length = 28, batch = 6:
|
|
* out[0, 0], out[0, 1], out[0, 2], ..., out[0, 27], 0, 0, 0, 0,
|
|
* out[1, 0], out[1, 1], out[1, 2], ..., out[1, 27], 0, 0, 0, 0,
|
|
* ...
|
|
* out[5, 0], out[5, 1], out[5, 2], ..., out[5, 27], 0, 0, 0, 0,
|
|
* zeros are padded here to guarantee the distance between out[i, j] and out[i + 1, j] is ceil(out_length / 8) * 8 bytes
|
|
* @param weight: where the weight, a matrix of size out_length * in_length, is put
|
|
* weight should be arranged in the form below, as an example, we set in_length = 28, out_length = 6
|
|
* w[0, 0], w[0, 1], w[0, 2], ..., w[0, 27], 0, 0, 0, 0,
|
|
* w[1, 0], w[1, 1], w[1, 2], ..., w[1, 27], 0, 0, 0, 0,
|
|
* ...
|
|
* w[5, 0], w[5, 1], w[5, 2], ..., w[5, 27], 0, 0, 0, 0,
|
|
* zeros are padded here to guarantee the distance between w[i, j] and w[i, j+1] in memory is ceil(in_length / 8) * 8 bytes
|
|
* @param bias: where the bias is put
|
|
* @param in_length: param shown above
|
|
* @param out_length: param shown above
|
|
* @param output_right_shift: the right shift number shown in the description of this function
|
|
* @param batch: param shown above
|
|
* @return 0 -- succeed
|
|
* @return 1 -- in_length is 0
|
|
* @return 2 -- out_length is 0
|
|
* @return 3 -- batch is 0
|
|
*/
|
|
uint8_t fc_int8_to_int8_weight_8bit_batch(int8_t *input, int8_t *output,
|
|
int8_t *weight, int32_t *bias, uint32_t in_length, uint32_t out_length,
|
|
uint8_t right_shift, uint32_t batch);
|
|
|
|
/* @brief fc_int8_to_int8_weight_8bit_bias_shift_batch() - fully connect operation as below:
|
|
output[k, i] = (sum(input[k, j] * weight[i, j]) + bias[i] << bias_left_shift) >> output_right_shift
|
|
where the sum is done on index j.
|
|
saturation included, i.e. output elements out of range [-128, 127] will be set to -128 or 127
|
|
* @param input: where the input, a matrix of size batch * in_length, is put
|
|
* input should be arranged as below, as an example, we set in_length = 28, batch = 6:
|
|
* in[0, 0], in[0, 1], in[0, 2], ..., in[0, 27], 0, 0, 0, 0
|
|
* in[1, 0], in[1, 1], in[1, 2], ..., in[1, 27], 0, 0, 0, 0
|
|
* ...
|
|
* in[5, 0], in[5, 1], in[5, 2], ..., in[5, 27], 0, 0, 0, 0
|
|
* zeros are padded here to guarantee the distance between in[i, j] and in[i + 1, j] is ceil(in_length / 8) * 8 bytes
|
|
* @param output: where the output, a matrix of size batch * out_length, will be put
|
|
* output should be arranged as below, as an example, we set out_length = 28, batch = 6:
|
|
* out[0, 0], out[0, 1], out[0, 2], ..., out[0, 27], 0, 0, 0, 0
|
|
* out[1, 0], out[1, 1], out[1, 2], ..., out[1, 27], 0, 0, 0, 0
|
|
* ...
|
|
* out[5, 0], out[5, 1], out[5, 2], ..., out[5, 27], 0, 0, 0, 0
|
|
* zeros are padded here to guarantee the distance between out[i, j] and out[i + 1, j] is ceil(out_length / 8) * 8 bytes
|
|
* @param weight: where the weight, a matrix of size out_length * in_length, is put
|
|
* weight should be arranged in the form below, as an example, we set in_length = 28, out_length = 6
|
|
* w[0, 0], w[0, 1], w[0, 2], ..., w[0, 27], 0, 0, 0, 0,
|
|
* w[1, 0], w[1, 1], w[1, 2], ..., w[1, 27], 0, 0, 0, 0,
|
|
* ...
|
|
* w[5, 0], w[5, 1], w[5, 2], ..., w[5, 27], 0, 0, 0, 0,
|
|
* zeros are padded here to guarantee the distance between w[i, j] and w[i, j+1] in memory is ceil(in_length / 8) * 8 bytes
|
|
* @param bias: where the bias is put
|
|
* @param in_length: param shown above
|
|
* @param out_length: param shown above
|
|
* @param output_right_shift: the right shift number shown in the description of this function
|
|
* @param bias_left_shift: the left shift number shown in the description of this function
|
|
* @param batch: param shown above
|
|
* @return 0 -- succeed
|
|
* @return 1 -- in_length is 0
|
|
* @return 2 -- out_length is 0
|
|
* @return 3 -- batch is 0
|
|
*/
|
|
uint8_t fc_int8_to_int8_weight_8bit_bias_shift_batch(int8_t *input,
|
|
int8_t *output, int8_t *weight, int8_t *bias,
|
|
uint32_t in_length, uint32_t out_length, uint8_t right_shift,
|
|
uint8_t bias_left_shift, uint32_t batch);
|
|
|
|
/* @brief fc_int16_to_int16_weight_16bit_batch() - fully connect operation as below:
|
|
output[k, i] = (sum(input[k, j] * weight[i, j]) + bias[i]) >> output_right_shift
|
|
where the sum is done on index j.
|
|
saturation included, i.e. output elements out of range [-32768, 32767] will be set to -32768 or 32767
|
|
* @param input: where the input, a matrix of size batch * in_length, is put
|
|
* input should be arranged as below, as an example, we set in_length = 14, batch = 6:
|
|
* in[0, 0], in[0, 1], in[0, 2], ..., in[0, 13], 0, 0
|
|
* in[1, 0], in[1, 1], in[1, 2], ..., in[1, 13], 0, 0
|
|
* ...
|
|
* in[5, 0], in[5, 1], in[5, 2], ..., in[5, 13], 0, 0
|
|
* zeros are padded here to guarantee the distance between in[i, j] and in[i + 1, j] is ceil(in_length / 4) * 8
|
|
* @param output: where the output, a matrix of size batch * out_length, will be put
|
|
* output should be arranged as below, as an example, we set out_length = 14, batch = 6:
|
|
* out[0, 0], out[0, 1], out[0, 2], ..., out[0, 13], 0, 0,
|
|
* out[1, 0], out[1, 1], out[1, 2], ..., out[1, 13], 0, 0,
|
|
* ...
|
|
* out[5, 0], out[5, 1], out[5, 2], ..., out[5, 13], 0, 0,
|
|
* zeros are padded here to guarantee the distance between out[i, j] and out[i + 1, j] is ceil(out_length / 4) * 8
|
|
* @param weight: where the weight, a matrix of size out_length * in_length, is put
|
|
* weight should be arranged as below, as an example, we set in_length = 14, out_length = 6:
|
|
* w[0, 0], w[0, 1], w[0, 2], ..., w[0, 13], 0, 0
|
|
* w[1, 0], w[1, 1], w[1, 2], ..., w[1, 13], 0, 0
|
|
* ...
|
|
* w[5, 0], w[5, 1], w[5, 2], ..., w[5, 13], 0, 0
|
|
* zeros are padded here to guarantee the distance between w[i, j] and w[i + 1, j] is ceil(in_length / 4) * 8
|
|
* @param bias: where the bias is put
|
|
* @param in_length: param shown above
|
|
* @param out_length: param shown above
|
|
* @param output_right_shift: the right shift number shown in the description of this function
|
|
* @param batch: param shown above
|
|
* @return 0 -- succeed
|
|
* @return 1 -- in_length is 0
|
|
* @return 2 -- out_length is 0
|
|
* @return 3 -- batch is 0
|
|
*/
|
|
uint8_t fc_int16_to_int16_weight_16bit_batch(int16_t *input, int16_t *output,
|
|
int16_t *weight, int64_t *bias, uint32_t in_length, uint32_t out_length,
|
|
uint8_t right_shift, uint32_t batch);
|
|
|
|
/* @brief fc_int16_to_int16_weight_16bit_bias_shift_batch() - fully connect operation as below:
|
|
output[k, i] = (sum(input[k, j] * weight[i, j]) + bias[i] << bias_left_shift) >> output_right_shift
|
|
where the sum is done on index j.
|
|
saturation included, i.e. output elements out of range [-32768, 32767] will be set to -32768 or 32767
|
|
* @param input: where the input, a matrix of size batch * in_length, is put
|
|
* input should be arranged as below, as an example, we set in_length = 14, batch = 6:
|
|
* in[0, 0], in[0, 1], in[0, 2], ..., in[0, 13], 0, 0
|
|
* in[1, 0], in[1, 1], in[1, 2], ..., in[1, 13], 0, 0
|
|
* ...
|
|
* in[5, 0], in[5, 1], in[5, 2], ..., in[5, 13], 0, 0
|
|
* zeros are padded here to guarantee the distance between in[i, j] and in[i + 1, j] is ceil(in_length / 4) * 8
|
|
* @param output: where the output, a matrix of size batch * out_length, will be put
|
|
* output should be arranged as below, as an example, we set out_length = 14, batch = 6:
|
|
* out[0, 0], out[0, 1], out[0, 2], ..., out[0, 13], 0, 0,
|
|
* out[1, 0], out[1, 1], out[1, 2], ..., out[1, 13], 0, 0,
|
|
* ...
|
|
* out[5, 0], out[5, 1], out[5, 2], ..., out[5, 13], 0, 0,
|
|
* zeros are padded here to guarantee the distance between out[i, j] and out[i + 1, j] is ceil(out_length / 4) * 8
|
|
* @param weight: where the weight, a matrix of size out_length * in_length, is put
|
|
* weight should be arranged as below, as an example, we set in_length = 14, out_length = 6:
|
|
* w[0, 0], w[0, 1], w[0, 2], ..., w[0, 13], 0, 0
|
|
* w[1, 0], w[1, 1], w[1, 2], ..., w[1, 13], 0, 0
|
|
* ...
|
|
* w[5, 0], w[5, 1], w[5, 2], ..., w[5, 13], 0, 0
|
|
* zeros are padded here to guarantee the distance between w[i, j] and w[i + 1, j] is ceil(in_length / 4) * 8
|
|
* @param bias: where the bias is put
|
|
* @param in_length: param shown above
|
|
* @param out_length: param shown above
|
|
* @param output_right_shift: the right shift number shown in the description of this function
|
|
* @param bias_left_shift: the left shift number shown in the description of this function
|
|
* @param batch: param shown above
|
|
* @return 0 -- succeed
|
|
* @return 1 -- in_length is 0
|
|
* @return 2 -- out_length is 0
|
|
* @return 3 -- batch is 0
|
|
*/
|
|
uint8_t fc_int16_to_int16_weight_16bit_bias_shift_batch(int16_t *input,
|
|
int16_t *output, int16_t *weight, int16_t *bias,
|
|
uint32_t in_length, uint32_t out_length, uint8_t right_shift,
|
|
uint8_t bias_left_shift, uint32_t batch);
|
|
|
|
/* @brief fc_depth_int8_to_int8_weight_8bit_batch() - "depth" fully connect operation as below:
|
|
output[k, i] = (input[k, i] * weight[i] + bias[i]) >> output_right_shift
|
|
saturation included, i.e. output elements out of range [-128, 127] will be set to -128 or 127
|
|
* @param input: where the input, a matrix of size batch * length, is put
|
|
* input should be arranged as below, as an example, we set length = 14, batch = 6:
|
|
* in[0, 0], in[0, 1], in[0, 2], ..., in[0, 13], 0, 0
|
|
* in[1, 0], in[1, 1], in[1, 2], ..., in[1, 13], 0, 0
|
|
* ...
|
|
* in[5, 0], in[5, 1], in[5, 2], ..., in[5, 13], 0, 0
|
|
* zeros are padded here to guarantee the distance between in[i, j] and in[i + 1, j] is ceil(in_length / 8) * 8
|
|
* @param output: where the output, a matrix of size batch * length, will be put
|
|
* output should be arranged as below, as an example, we set length = 14, batch = 6:
|
|
* out[0, 0], out[0, 1], out[0, 2], ..., out[0, 13], 0, 0,
|
|
* out[1, 0], out[1, 1], out[1, 2], ..., out[1, 13], 0, 0,
|
|
* ...
|
|
* out[5, 0], out[5, 1], out[5, 2], ..., out[5, 13], 0, 0,
|
|
* zeros are padded here to guarantee the distance between out[i, j] and out[i + 1, j] is ceil(out_length / 8) * 8
|
|
* @param weight: where the weight is put
|
|
* @param bias: where the bias is put
|
|
* @param length: param shown above
|
|
* @param output_right_shift: the right shift number shown in the description of this function
|
|
* @param batch: param shown above
|
|
* @return 0 -- succeed
|
|
* @return 1 -- length is 0
|
|
* @return 2 -- batch is 0
|
|
*/
|
|
uint8_t fc_depth_int8_to_int8_weight_8bit_batch(int8_t *input, int8_t *output,
|
|
int8_t *weight, int32_t *bias, uint32_t length, uint8_t output_right_shift,
|
|
uint32_t batch);
|
|
|
|
/* @brief fc_depth_int8_to_int8_weight_8bit_batch_bias_shift() - "depth" fully connect operation as below:
|
|
output[k, i] = (input[k, i] * weight[i] + bias[i] << bias_left_shift) >> output_right_shift
|
|
saturation included, i.e. output elements out of range [-128, 127] will be set to -128 or 127
|
|
* @param input: where the input, a matrix of size batch * length, is put
|
|
* input should be arranged as below, as an example, we set length = 14, batch = 6:
|
|
* in[0, 0], in[0, 1], in[0, 2], ..., in[0, 13], 0, 0
|
|
* in[1, 0], in[1, 1], in[1, 2], ..., in[1, 13], 0, 0
|
|
* ...
|
|
* in[5, 0], in[5, 1], in[5, 2], ..., in[5, 13], 0, 0
|
|
* zeros are padded here to guarantee the distance between in[i, j] and in[i + 1, j] is ceil(in_length / 8) * 8
|
|
* @param output: where the output, a matrix of size batch * length, will be put
|
|
* output should be arranged as below, as an example, we set length = 14, batch = 6:
|
|
* out[0, 0], out[0, 1], out[0, 2], ..., out[0, 13], 0, 0,
|
|
* out[1, 0], out[1, 1], out[1, 2], ..., out[1, 13], 0, 0,
|
|
* ...
|
|
* out[5, 0], out[5, 1], out[5, 2], ..., out[5, 13], 0, 0,
|
|
* zeros are padded here to guarantee the distance between out[i, j] and out[i + 1, j] is ceil(out_length / 8) * 8
|
|
* @param weight: where the weight is put
|
|
* @param bias: where the bias is put
|
|
* @param length: param shown above
|
|
* @param output_right_shift: the right shift number shown in the description of this function
|
|
* @param bias_left_shift: the left shift number shown in the description of this function
|
|
* @param batch: param shown above
|
|
* @return 0 -- succeed
|
|
* @return 1 -- length is 0
|
|
* @return 2 -- batch is 0
|
|
*/
|
|
uint8_t fc_depth_int8_to_int8_weight_8bit_bias_shift_batch(int8_t *input,
|
|
int8_t *output, int8_t *weight, int8_t *bias, uint32_t length,
|
|
uint8_t output_right_shift, uint8_t bias_left_shift, uint32_t batch);
|
|
|
|
/* @brief fc_depth_int16_to_int16_weight_16bit_batch() - "depth" fully connect operation as below:
|
|
output[k, i] = (input[k, i] * weight[i] + bias[i]) >> output_right_shift
|
|
saturation included, i.e. output elements out of range [-32768, 32767] will be set to -32768 or 32767
|
|
* @param input: where the input, a matrix of size batch * in_length, is put
|
|
* input should be arranged as below, as an example, we set in_length = 14, batch = 6:
|
|
* in[0, 0], in[0, 1], in[0, 2], ..., in[0, 13], 0, 0
|
|
* in[1, 0], in[1, 1], in[1, 2], ..., in[1, 13], 0, 0
|
|
* ...
|
|
* in[5, 0], in[5, 1], in[5, 2], ..., in[5, 13], 0, 0
|
|
* zeros are padded here to guarantee the distance between in[i, j] and in[i + 1, j] is ceil(in_length / 4) * 8
|
|
* @param output: where the output, a matrix of size batch * out_length, will be put
|
|
* output should be arranged as below, as an example, we set out_length = 14, batch = 6:
|
|
* out[0, 0], out[0, 1], out[0, 2], ..., out[0, 13], 0, 0,
|
|
* out[1, 0], out[1, 1], out[1, 2], ..., out[1, 13], 0, 0,
|
|
* ...
|
|
* out[5, 0], out[5, 1], out[5, 2], ..., out[5, 13], 0, 0,
|
|
* zeros are padded here to guarantee the distance between out[i, j] and out[i + 1, j] is ceil(out_length / 4) * 8
|
|
* @param weight: where the weight is put
|
|
* @param bias: where the bias is put
|
|
* @param length: param shown above
|
|
* @param output_right_shift: the right shift number shown in the description of this function
|
|
* @param batch: param shown above
|
|
* @return 0 -- succeed
|
|
* @return 1 -- length is 0
|
|
* @return 2 -- batch is 0
|
|
*/
|
|
uint8_t fc_depth_int16_to_int16_weight_16bit_batch(int16_t *input,
|
|
int16_t *output, int16_t *weight, int64_t *bias, uint32_t length,
|
|
uint8_t output_right_shift, uint32_t batch);
|
|
|
|
/* @brief fc_depth_int16_to_int16_weight_16bit_bias_shift_batch() - "depth" fully connect operation as below:
|
|
output[k, i] = (input[k, i] * weight[i] + bias[i] << bias_left_shift) >> output_right_shift
|
|
saturation included, i.e. output elements out of range [-32768, 32767] will be set to -32768 or 32767
|
|
* @param input: where the input, a matrix of size batch * in_length, is put
|
|
* input should be arranged as below, as an example, we set in_length = 14, batch = 6:
|
|
* in[0, 0], in[0, 1], in[0, 2], ..., in[0, 13], 0, 0
|
|
* in[1, 0], in[1, 1], in[1, 2], ..., in[1, 13], 0, 0
|
|
* ...
|
|
* in[5, 0], in[5, 1], in[5, 2], ..., in[5, 13], 0, 0
|
|
* zeros are padded here to guarantee the distance between in[i, j] and in[i + 1, j] is ceil(in_length / 4) * 8
|
|
* @param output: where the output, a matrix of size batch * out_length, will be put
|
|
* output should be arranged as below, as an example, we set out_length = 14, batch = 6:
|
|
* out[0, 0], out[0, 1], out[0, 2], ..., out[0, 13], 0, 0,
|
|
* out[1, 0], out[1, 1], out[1, 2], ..., out[1, 13], 0, 0,
|
|
* ...
|
|
* out[5, 0], out[5, 1], out[5, 2], ..., out[5, 13], 0, 0,
|
|
* zeros are padded here to guarantee the distance between out[i, j] and out[i + 1, j] is ceil(out_length / 4) * 8
|
|
* @param weight: where the weight is put
|
|
* @param bias: where the bias is put
|
|
* @param length: param shown above
|
|
* @param output_right_shift: the right shift number shown in the description of this function
|
|
* @param bias_left_shift: the left shift number shown in the description of this function
|
|
* @param batch: param shown above
|
|
* @return 0 -- succeed
|
|
* @return 1 -- length is 0
|
|
* @return 2 -- batch is 0
|
|
*/
|
|
uint8_t fc_depth_int16_to_int16_weight_16bit_bias_shift_batch(int16_t *input,
|
|
int16_t *output, int16_t *weight, int16_t *bias, uint32_t length,
|
|
uint8_t output_right_shift, uint8_t bias_left_shift, uint32_t batch);
|
|
|
|
/* @brief conv1d_int8_to_int8_weight_8bit() - 1-dimension conv operation as below:
|
|
output[c, l] = (sum(input[i, l * stride + k * dilation - padding_left] * weight[c, i, k]) + bias[c]) >> output_right_shift
|
|
where the sum is done on index i and k.
|
|
saturation included, i.e. output elements out of range [-128, 127] will be set to -128 or 127
|
|
* @param in: where the input, a 2D-tensor of size [in_channel, in_length], is put
|
|
* input is arranged in the form below, as an example, we set in_channel = 28, in_length = 10
|
|
* input[0, 0], input[1, 0], input[2, 0], ..., input[27, 0], 0, 0, 0, 0,
|
|
* input[0, 1], input[1, 1], ..., input[27, 1], 0, 0, 0, 0, ...,
|
|
* input[0, 9], ..., input[27, 9], 0, 0, 0, 0
|
|
* zeros are padded so that the distance between input[i, j] and input[i, j + 1] is ceil(in_channel / 16.0) * 8 bytes
|
|
* @param out: where the output, a 2D-tensor, of size [out_channel, out_length] will be put, output will be arranged in the same form as input's
|
|
* where out_length = 1 + floor((in_length + padding_left + padding_right - (kernel_length - 1) * dilation - 1) / stride)
|
|
* @param weight: where the weight is put.
|
|
* if in_channel, out_channel, and group are the same, weight is a 3D-tensor of size [out_channel, 1, kernel_length].
|
|
* it should be arranged in the form below, as an example, we set kernel_length = 4, in_channel = 14, out_channel = 14
|
|
* w[0, 0, 0], w[1, 0, 0], ..., w[7, 0, 0], w[0, 0, 1], w[1, 0, 1], ..., w[7, 0, 1], ..., w[0, 0, 3], ..., w[7, 0, 3],
|
|
* w[8, 0, 0], ..., w[13, 0, 0], 0, 0, w[8, 0, 1], ..., w[13, 0, 1], 0, 0, ..., w[8, 0, 3], ..., w[13, 0, 3], 0, 0
|
|
* zeros are padded so that the distance between w[i, 0, k] and w[i, 0, k + 1] is always 8 bytes
|
|
* otherwise, weight is a 3D-tensor of size [out_channel, in_channel / group, kernel_length],
|
|
* it could be divided by groups, in each group, it is a 3D-tensor of size [out_channel / group, in_channel / group, kernel_length]
|
|
* weight should be arranged in the form below, as an example, we set kernel_length = 2, in_channel = 4, out_channel = 28, group = 2
|
|
* (noticing in_channel / group = 2, out_channel / group = 14, so w[0:14, :, :] is for the 1st group, w[14:28, :, :] is for the 2nd group)
|
|
* w[0, 0, 0], w[1, 0, 0], ..., w[7, 0, 0], w[0, 0, 1], w[1, 0, 1], ..., w[7, 0, 1], ..., w[0, 0, 3], ..., w[7, 0, 3],
|
|
* w[0, 1, 0], w[1, 1, 0], ..., w[7, 1, 0], w[0, 1, 1], w[1, 1, 1], ..., w[7, 1, 1], ..., w[0, 1, 3], ..., w[7, 1, 3],
|
|
* w[8, 0, 0], ..., w[13, 0, 0], 0, 0, w[8, 0, 1], ..., w[13, 0, 1], 0, 0, ..., w[8, 0, 3], ..., w[13, 0, 3], 0, 0,
|
|
* w[8, 1, 0], ..., w[13, 1, 0], 0, 0, w[8, 1, 1], ..., w[13, 1, 1], 0, 0, ..., w[8, 1, 3], ..., w[13, 1, 3], 0, 0,
|
|
* w[14, 0, 0], w[15, 0, 0], ..., w[21, 0, 0], w[14, 0, 1], w[15, 0, 1], ..., w[21, 0, 1], ..., w[14, 0, 3], ..., w[21, 0, 3],
|
|
* w[14, 1, 0], w[15, 1, 0], ..., w[21, 1, 0], w[14, 1, 1], w[15, 1, 1], ..., w[21, 1, 1], ..., w[14, 1, 3], ..., w[21, 1, 3],
|
|
* w[22, 0, 0], ..., w[27, 0, 0], 0, 0, 0, 0, w[22, 0, 1], ..., w[27, 0, 1], 0, 0, 0, 0, ..., w[22, 0, 3], ..., w[27, 0, 3], 0, 0, 0, 0
|
|
* w[22, 1, 0], ..., w[27, 1, 0], 0, 0, 0, 0, w[22, 1, 1], ..., w[27, 1, 1], 0, 0, 0, 0, ..., w[22, 1, 3], ..., w[27, 1, 3], 0, 0, 0, 0
|
|
* zeros are padded here to guarantee the distance between w[i, j, k] and w[i, j, k + 1] in memory is 8 bytes
|
|
* @param bias: where the bias is put, if bias is not needed, this pointer should be set to NULL
|
|
* @param bean: pointer to the config, whose description is shown in the beginning
|
|
* @param output_right_shift: the right shift number shown in the description of this function
|
|
* @return 0 -- succeed
|
|
* @return 1 -- in_channel is 0
|
|
* @return 2 -- out_channel is 0
|
|
* @return 3 -- in_length is 0
|
|
* @return 4 -- kernel_length is 0
|
|
* @return 5 -- stride is 0
|
|
* @return 6 -- dilation is 0
|
|
* @return 7 -- group is 0
|
|
* @return 8 -- in_channel % group is not 0
|
|
* @return 9 -- out_channel % group is not 0
|
|
* @return 10 -- after calculation, output_length is not a positive number
|
|
*/
|
|
uint8_t conv1d_int8_to_int8_weight_8bit(int8_t *in, int8_t *out,
|
|
int8_t *weight, int32_t *bias, struct conv1d_config_bean *bean,
|
|
uint8_t output_right_shift);
|
|
|
|
/* @brief conv1d_int8_to_int8_weight_8bit() - 1-dimension conv operation as below:
|
|
output[c, l] = (sum(input[i, l * stride + k * dilation - padding_left] * weight[c, i, k]) + bias[c] << bias_left_shift) >> output_right_shift
|
|
where the sum is done on index i and k.
|
|
saturation included, i.e. output elements out of range [-128, 127] will be set to -128 or 127
|
|
* @param in: where the input, a 2D-tensor of size [in_channel, in_length], is put
|
|
* input is arranged in the form below, as an example, we set in_channel = 28, in_length = 10
|
|
* input[0, 0], input[1, 0], input[2, 0], ..., input[27, 0], 0, 0, 0, 0,
|
|
* input[0, 1], input[1, 1], ..., input[27, 1], 0, 0, 0, 0, ...,
|
|
* input[0, 9], ..., input[27, 9], 0, 0, 0, 0
|
|
* zeros are padded so that the distance between input[i, j] and input[i, j + 1] is ceil(in_channel / 16.0) * 8 bytes
|
|
* @param out: where the output, a 2D-tensor, of size [out_channel, out_length] will be put, output will be arranged in the same form as input's
|
|
* where out_length = 1 + floor((in_length + padding_left + padding_right - (kernel_length - 1) * dilation - 1) / stride)
|
|
* @param weight: where the weight is put.
|
|
* if in_channel, out_channel, and group are the same, weight is a 3D-tensor of size [out_channel, 1, kernel_length].
|
|
* it should be arranged in the form below, as an example, we set kernel_length = 4, in_channel = 14, out_channel = 14
|
|
* w[0, 0, 0], w[1, 0, 0], ..., w[7, 0, 0], w[0, 0, 1], w[1, 0, 1], ..., w[7, 0, 1], ..., w[0, 0, 3], ..., w[7, 0, 3],
|
|
* w[8, 0, 0], ..., w[13, 0, 0], 0, 0, w[8, 0, 1], ..., w[13, 0, 1], 0, 0, ..., w[8, 0, 3], ..., w[13, 0, 3], 0, 0
|
|
* zeros are padded so that the distance between w[i, 0, k] and w[i, 0, k + 1] is always 8 bytes
|
|
* otherwise, weight is a 3D-tensor of size [out_channel, in_channel / group, kernel_length],
|
|
* it could be divided by groups, in each group, it is a 3D-tensor of size [out_channel / group, in_channel / group, kernel_length]
|
|
* weight should be arranged in the form below, as an example, we set kernel_length = 2, in_channel = 4, out_channel = 28, group = 2
|
|
* (noticing in_channel / group = 2, out_channel / group = 14, so w[0:14, :, :] is for the 1st group, w[14:28, :, :] is for the 2nd group)
|
|
* w[0, 0, 0], w[1, 0, 0], ..., w[7, 0, 0], w[0, 0, 1], w[1, 0, 1], ..., w[7, 0, 1], ..., w[0, 0, 3], ..., w[7, 0, 3],
|
|
* w[0, 1, 0], w[1, 1, 0], ..., w[7, 1, 0], w[0, 1, 1], w[1, 1, 1], ..., w[7, 1, 1], ..., w[0, 1, 3], ..., w[7, 1, 3],
|
|
* w[8, 0, 0], ..., w[13, 0, 0], 0, 0, w[8, 0, 1], ..., w[13, 0, 1], 0, 0, ..., w[8, 0, 3], ..., w[13, 0, 3], 0, 0,
|
|
* w[8, 1, 0], ..., w[13, 1, 0], 0, 0, w[8, 1, 1], ..., w[13, 1, 1], 0, 0, ..., w[8, 1, 3], ..., w[13, 1, 3], 0, 0,
|
|
* w[14, 0, 0], w[15, 0, 0], ..., w[21, 0, 0], w[14, 0, 1], w[15, 0, 1], ..., w[21, 0, 1], ..., w[14, 0, 3], ..., w[21, 0, 3],
|
|
* w[14, 1, 0], w[15, 1, 0], ..., w[21, 1, 0], w[14, 1, 1], w[15, 1, 1], ..., w[21, 1, 1], ..., w[14, 1, 3], ..., w[21, 1, 3],
|
|
* w[22, 0, 0], ..., w[27, 0, 0], 0, 0, 0, 0, w[22, 0, 1], ..., w[27, 0, 1], 0, 0, 0, 0, ..., w[22, 0, 3], ..., w[27, 0, 3], 0, 0, 0, 0
|
|
* w[22, 1, 0], ..., w[27, 1, 0], 0, 0, 0, 0, w[22, 1, 1], ..., w[27, 1, 1], 0, 0, 0, 0, ..., w[22, 1, 3], ..., w[27, 1, 3], 0, 0, 0, 0
|
|
* zeros are padded here to guarantee the distance between w[i, j, k] and w[i, j, k + 1] in memory is 8 bytes
|
|
* @param bias: where the bias is put, if bias is not needed, this pointer should be set to NULL
|
|
* @param bean: pointer to the config, whose description is shown in the beginning
|
|
* @param output_right_shift: the right shift number shown in the description of this function
|
|
* @param bias_left_shift: the left shift number shown in the description of this function
|
|
* @return 0 -- succeed
|
|
* @return 1 -- in_channel is 0
|
|
* @return 2 -- out_channel is 0
|
|
* @return 3 -- in_length is 0
|
|
* @return 4 -- kernel_length is 0
|
|
* @return 5 -- stride is 0
|
|
* @return 6 -- dilation is 0
|
|
* @return 7 -- group is 0
|
|
* @return 8 -- in_channel % group is not 0
|
|
* @return 9 -- out_channel % group is not 0
|
|
* @return 10 -- after calculation, output_length is not a positive number
|
|
*/
|
|
uint8_t conv1d_int8_to_int8_weight_8bit_bias_shift(int8_t *in, int8_t *out,
|
|
int8_t *weight, int8_t *bias, struct conv1d_config_bean *bean,
|
|
uint8_t output_right_shift, uint8_t bias_left_shift);
|
|
|
|
/* @brief conv1d_int16_to_int16_weight_16bit() - 1-dimension conv operation as below:
|
|
output[c, l] = (sum(input[i, l * stride + k * dilation - padding_left] * weight[c, i, k]) + bias[c]) >> output_right_shift
|
|
where the sum is done on index i and k.
|
|
saturation included, i.e. output elements out of range [-32768, 32767] will be set to -32768 or 32767
|
|
* @param in: where the input, a 2D-tensor of size [in_channel, in_length], is put
|
|
* input is arranged in the form below, as an example, we set in_channel = 14, in_length = 10
|
|
* input[0, 0], input[1, 0], input[2, 0], ..., input[13, 0], 0, 0,
|
|
* input[0, 1], input[1, 1], ..., input[13, 1], 0, 0, ...,
|
|
* input[0, 9], ..., input[13, 9], 0, 0
|
|
* zeros are padded so that the distance between input[i, j] and input[i, j + 1] is ceil(in_channel / 8.0) * 8 bytes
|
|
* @param out: where the output, a 2D-tensor, of size [out_channel, out_length] will be put, output will be arranged in the same form as input's
|
|
* where out_length = 1 + floor((in_length + padding_left + padding_right - (kernel_length - 1) * dilation - 1) / stride)
|
|
* @param weight: where the weight is put.
|
|
* if in_channel, out_channel, and group are the same, weight is a 3D-tensor of size [out_channel, 1, kernel_length].
|
|
* it should be arranged in the form below, as an example, we set kernel_length = 4, in_channel = 7, out_channel = 7
|
|
* w[0, 0, 0], w[1, 0, 0], w[2, 0, 0], w[3, 0, 0], w[0, 0, 1], w[1, 0, 1], w[2, 0, 1], w[3, 0, 1], ..., w[0, 0, 3], ..., w[3, 0, 3],
|
|
* w[4, 0, 0], w[5, 0, 0], w[6, 0, 0], 0, w[4, 0, 1], w[5, 0, 1], w[6, 0, 1], 0, ..., w[4, 0, 3], w[5, 0, 3], w[6, 0, 3], 0
|
|
* zeros are padded so that the distance between w[i, 0, k] and w[i, 0, k + 1] is always 8 bytes
|
|
* otherwise, weight is a 3D-tensor of size [out_channel, in_channel / group, kernel_length],
|
|
* it could be divided by groups, in each group, it is a 3D-tensor of size [out_channel / group, in_channel / group, kernel_length]
|
|
* weight should be arranged in the form below, as an example, we set kernel_length = 2, in_channel = 4, out_channel = 14, group = 2
|
|
* (noticing in_channel / group = 2, out_channel / group = 7, so w[0:7, :, :] is for the 1st group, w[7:14, :, :] is for the 2nd group)
|
|
* w[0, 0, 0], w[1, 0, 0], w[2, 0, 0], w[3, 0, 0], w[0, 0, 1], w[1, 0, 1], w[2, 0, 1], w[3, 0, 1], w[2, 0, 2], w[0, 0, 3], ..., w[3, 0, 3],
|
|
* w[0, 1, 0], w[1, 1, 0], w[2, 1, 0], w[3, 1, 0], w[0, 1, 1], w[1, 1, 1], w[2, 1, 1], w[3, 1, 1], w[2, 1, 2], w[0, 1, 3], ..., w[3, 1, 3],
|
|
* w[4, 0, 0], w[5, 0, 0], w[6, 0, 0], 0, w[4, 0, 1], w[5, 0, 1], w[6, 0, 1], 0, ..., w[4, 0, 3], w[5, 0, 1], w[6, 0, 3], 0
|
|
* w[4, 1, 0], w[5, 1, 0], w[6, 1, 0], 0, w[4, 1, 1], w[5, 1, 1], w[6, 1, 1], 0, ..., w[4, 1, 3], w[5, 1, 1], w[6, 1, 3], 0
|
|
* w[7, 0, 0], w[8, 0, 0], ..., w[10, 0, 0], w[7, 0, 1], w[8, 0, 1], ..., w[10, 0, 1], ..., w[7, 0, 3], ..., w[10, 0, 3],
|
|
* w[7, 1, 0], w[8, 1, 0], ..., w[10, 1, 0], w[7, 1, 1], w[8, 1, 1], ..., w[10, 1, 1], ..., w[7, 1, 3], ..., w[10, 1, 3],
|
|
* w[11, 0, 0], ..., w[13, 0, 0], 0, 0, w[11, 0, 1], ..., w[13, 0, 1], 0, 0, ..., w[11, 0, 3], ..., w[13, 0, 3], 0, 0
|
|
* w[11, 1, 0], ..., w[13, 1, 0], 0, 0, w[11, 1, 1], ..., w[13, 1, 1], 0, 0, ..., w[11, 1, 3], ..., w[13, 1, 3], 0, 0
|
|
* zeros are padded here to guarantee the distance between w[i, j, k] and w[i, j, k + 1] in memory is 8 bytes
|
|
* @param bias: where the bias is put, if bias is not needed, this pointer should be set to NULL
|
|
* @param bean: pointer to the config, whose description is shown in the beginning
|
|
* @param output_right_shift: the right shift number shown in the description of this function
|
|
* @return 0 -- succeed
|
|
* @return 1 -- in_channel is 0
|
|
* @return 2 -- out_channel is 0
|
|
* @return 3 -- in_length is 0
|
|
* @return 4 -- kernel_length is 0
|
|
* @return 5 -- stride is 0
|
|
* @return 6 -- dilation is 0
|
|
* @return 7 -- group is 0
|
|
* @return 8 -- in_channel % group is not 0
|
|
* @return 9 -- out_channel % group is not 0
|
|
* @return 10 -- after calculation, output_length is not a positive number
|
|
*/
|
|
uint8_t conv1d_int16_to_int16_weight_16bit(int16_t *in, int16_t *out,
|
|
int16_t *weight, int64_t *bias, struct conv1d_config_bean *bean,
|
|
uint8_t output_right_shift);
|
|
|
|
/* @brief conv1d_int16_to_int16_weight_16bit_bias_shift() - 1-dimension conv operation as below:
|
|
output[c, l] = (sum(input[i, l * stride + k * dilation - padding_left] * weight[c, i, k]) + bias[c] << bias_left_shift) >> output_right_shift
|
|
where the sum is done on index i and k.
|
|
saturation included, i.e. output elements out of range [-32768, 32767] will be set to -32768 or 32767
|
|
* @param in: where the input, a 2D-tensor of size [in_channel, in_length], is put
|
|
* input is arranged in the form below, as an example, we set in_channel = 14, in_length = 10
|
|
* input[0, 0], input[1, 0], input[2, 0], ..., input[13, 0], 0, 0,
|
|
* input[0, 1], input[1, 1], ..., input[13, 1], 0, 0, ...,
|
|
* input[0, 9], ..., input[13, 9], 0, 0
|
|
* zeros are padded so that the distance between input[i, j] and input[i, j + 1] is ceil(in_channel / 8.0) * 8 bytes
|
|
* @param out: where the output, a 2D-tensor, of size [out_channel, out_length] will be put, output will be arranged in the same form as input's
|
|
* where out_length = 1 + floor((in_length + padding_left + padding_right - (kernel_length - 1) * dilation - 1) / stride)
|
|
* @param weight: where the weight is put.
|
|
* if in_channel, out_channel, and group are the same, weight is a 3D-tensor of size [out_channel, 1, kernel_length].
|
|
* it should be arranged in the form below, as an example, we set kernel_length = 4, in_channel = 7, out_channel = 7
|
|
* w[0, 0, 0], w[1, 0, 0], w[2, 0, 0], w[3, 0, 0], w[0, 0, 1], w[1, 0, 1], w[2, 0, 1], w[3, 0, 1], ..., w[0, 0, 3], ..., w[3, 0, 3],
|
|
* w[4, 0, 0], w[5, 0, 0], w[6, 0, 0], 0, w[4, 0, 1], w[5, 0, 1], w[6, 0, 1], 0, ..., w[4, 0, 3], w[5, 0, 3], w[6, 0, 3], 0
|
|
* zeros are padded so that the distance between w[i, 0, k] and w[i, 0, k + 1] is always 8 bytes
|
|
* otherwise, weight is a 3D-tensor of size [out_channel, in_channel / group, kernel_length],
|
|
* it could be divided by groups, in each group, it is a 3D-tensor of size [out_channel / group, in_channel / group, kernel_length]
|
|
* weight should be arranged in the form below, as an example, we set kernel_length = 2, in_channel = 4, out_channel = 14, group = 2
|
|
* (noticing in_channel / group = 2, out_channel / group = 7, so w[0:7, :, :] is for the 1st group, w[7:14, :, :] is for the 2nd group)
|
|
* w[0, 0, 0], w[1, 0, 0], w[2, 0, 0], w[3, 0, 0], w[0, 0, 1], w[1, 0, 1], w[2, 0, 1], w[3, 0, 1], w[2, 0, 2], w[0, 0, 3], ..., w[3, 0, 3],
|
|
* w[0, 1, 0], w[1, 1, 0], w[2, 1, 0], w[3, 1, 0], w[0, 1, 1], w[1, 1, 1], w[2, 1, 1], w[3, 1, 1], w[2, 1, 2], w[0, 1, 3], ..., w[3, 1, 3],
|
|
* w[4, 0, 0], w[5, 0, 0], w[6, 0, 0], 0, w[4, 0, 1], w[5, 0, 1], w[6, 0, 1], 0, ..., w[4, 0, 3], w[5, 0, 1], w[6, 0, 3], 0
|
|
* w[4, 1, 0], w[5, 1, 0], w[6, 1, 0], 0, w[4, 1, 1], w[5, 1, 1], w[6, 1, 1], 0, ..., w[4, 1, 3], w[5, 1, 1], w[6, 1, 3], 0
|
|
* w[7, 0, 0], w[8, 0, 0], ..., w[10, 0, 0], w[7, 0, 1], w[8, 0, 1], ..., w[10, 0, 1], ..., w[7, 0, 3], ..., w[10, 0, 3],
|
|
* w[7, 1, 0], w[8, 1, 0], ..., w[10, 1, 0], w[7, 1, 1], w[8, 1, 1], ..., w[10, 1, 1], ..., w[7, 1, 3], ..., w[10, 1, 3],
|
|
* w[11, 0, 0], ..., w[13, 0, 0], 0, 0, w[11, 0, 1], ..., w[13, 0, 1], 0, 0, ..., w[11, 0, 3], ..., w[13, 0, 3], 0, 0
|
|
* w[11, 1, 0], ..., w[13, 1, 0], 0, 0, w[11, 1, 1], ..., w[13, 1, 1], 0, 0, ..., w[11, 1, 3], ..., w[13, 1, 3], 0, 0
|
|
* zeros are padded here to guarantee the distance between w[i, j, k] and w[i, j, k + 1] in memory is 8 bytes
|
|
* @param bias: where the bias is put, if bias is not needed, this pointer should be set to NULL
|
|
* @param bean: pointer to the config, whose description is shown in the beginning
|
|
* @param output_right_shift: the right shift number shown in the description of this function
|
|
* @param bias_left_shift: the left shift number shown in the description of this function
|
|
* @return 0 -- succeed
|
|
* @return 1 -- in_channel is 0
|
|
* @return 2 -- out_channel is 0
|
|
* @return 3 -- in_length is 0
|
|
* @return 4 -- kernel_length is 0
|
|
* @return 5 -- stride is 0
|
|
* @return 6 -- dilation is 0
|
|
* @return 7 -- group is 0
|
|
* @return 8 -- in_channel % group is not 0
|
|
* @return 9 -- out_channel % group is not 0
|
|
* @return 10 -- after calculation, output_length is not a positive number
|
|
*/
|
|
uint8_t conv1d_int16_to_int16_weight_16bit_bias_shift(int16_t *in, int16_t *out,
|
|
int16_t *weight, int16_t *bias, struct conv1d_config_bean *bean,
|
|
uint8_t output_right_shift, uint8_t bias_left_shift);
|
|
|
|
/* @brief sigmoid_int8() - sigmoid function
|
|
* @param input: pointer to the input vector, input should be of form s(8, 4), i.e. signed number with fraction of 4
|
|
* @param output: pointer to the output vector, output will be of form s(8, 7), i.e. signed number with fraction of 7
|
|
*/
|
|
void sigmoid_int8(int8_t *input_, int8_t *output, uint32_t length);
|
|
|
|
/* @brief sigmoid_int16() - sigmoid function
|
|
* @param input: pointer to the input vector, input should be of form s(16, 12), i.e. signed number with fraction of 12
|
|
* @param output: pointer to the output vector, output will be of form s(16, 15), i.e. signed number with fraction of 15
|
|
*/
|
|
void sigmoid_int16(int16_t *input_, int16_t *output, uint32_t length);
|
|
|
|
/* @brief tanh_int8() - tanh function
|
|
* @param input: pointer to the input vector, input should be of form s(8, 5), i.e. signed number with fraction of 5
|
|
* @param output: pointer to the output vector, output will be of form s(8, 7), i.e. signed number with fraction of 7
|
|
*/
|
|
void tanh_int8(int8_t *input_, int8_t *output, uint32_t length);
|
|
|
|
/* @brief tanh_int16() - tanh function
|
|
* @param input: pointer to the input vector, input should be of form s(16, 13), i.e. signed number with fraction of 13
|
|
* @param output: pointer to the output vector, output will be of form s(16, 15), i.e. signed number with fraction of 15
|
|
*/
|
|
void tanh_int16(int16_t *input_, int16_t *output, uint32_t length);
|
|
|
|
/* @brief softmax_int8() - softmax function
|
|
* @param input: pointer to the input vector, input should be of form s(8, 4), i.e. signed number with fraction of 4
|
|
* @param output: pointer to the output vector, output will be of form s(8, 7), i.e. signed number with fraction of 7
|
|
*/
|
|
void softmax_int8(int8_t *input_, int8_t *output, uint16_t length);
|
|
|
|
/* @brief softmax_int16() - softmax function
|
|
* @param input: pointer to the input vector, input should be of form s(16, 12), i.e. signed number with fraction of 12
|
|
* @param output: pointer to the output vector, output will be of form s(16, 15), i.e. signed number with fraction of 15
|
|
*/
|
|
void softmax_int16(int16_t *input_, int16_t *output, uint16_t length);
|
|
|
|
/* @brief logsoftmax_int8() - logsoftmax function, formular as below:
|
|
output[b, i] = input_[b, i] - ln(sum(exp(input_[b, j]))), where the sum is done over j
|
|
input and output vector should be of form s(8, 5), i.e. signed number with fraction of 5
|
|
* @param input_: where the input_, a matrix of size batch * length, is put
|
|
* input should be arranged as below, as an example, we set length = 14, batch = 6:
|
|
* in[0, 0], in[0, 1], in[0, 2], ..., in[0, 13], 0, 0
|
|
* in[1, 0], in[1, 1], in[1, 2], ..., in[1, 13], 0, 0
|
|
* ...
|
|
* in[5, 0], in[5, 1], in[5, 2], ..., in[5, 13], 0, 0
|
|
* zeros are padded here to guarantee the distance between in[i, j] and in[i + 1, j] is ceil(in_length / 8) * 8
|
|
* @param output: where the output, a matrix of size batch * length, will be put
|
|
* output should be arranged as below, as an example, we set length = 14, batch = 6:
|
|
* out[0, 0], out[0, 1], out[0, 2], ..., out[0, 13], 0, 0,
|
|
* out[1, 0], out[1, 1], out[1, 2], ..., out[1, 13], 0, 0,
|
|
* ...
|
|
* out[5, 0], out[5, 1], out[5, 2], ..., out[5, 13], 0, 0,
|
|
* zeros are padded here to guarantee the distance between out[i, j] and out[i + 1, j] is ceil(out_length / 8) * 8
|
|
* @param max_indice: saving the max_indice of each batch of output
|
|
* @param max: saving the maximum value of each batch of output
|
|
* @param length: length of each input vector
|
|
* @param batch: batch of input vectors
|
|
* @return 0 -- succeed
|
|
* @return 1 -- length < 2
|
|
* @return 2 -- batch is 0
|
|
* @return 3 -- batch is not a multiple of 8
|
|
*/
|
|
uint8_t logsoftmax_int8(int8_t *input_, int8_t *output, uint16_t *max_indice, int8_t *max, uint16_t length, uint32_t batch);
|
|
|
|
/* @brief logsoftmax_int16() - logsoftmax function, formular as below:
|
|
output[b, i] = input_[b, i] - ln(sum(exp(input_[b, j]))), where the sum is done over j
|
|
input and output vector should be of form s(16, 12), i.e. signed number with fraction of 12
|
|
* @param input_: where the input_, a matrix of size batch * length, is put
|
|
* input should be arranged as below, as an example, we set length = 14, batch = 6:
|
|
* in[0, 0], in[0, 1], in[0, 2], ..., in[0, 13], 0, 0
|
|
* in[1, 0], in[1, 1], in[1, 2], ..., in[1, 13], 0, 0
|
|
* ...
|
|
* in[5, 0], in[5, 1], in[5, 2], ..., in[5, 13], 0, 0
|
|
* zeros are padded here to guarantee the distance between in[i, j] and in[i + 1, j] is ceil(in_length / 4) * 8
|
|
* @param output: where the output, a matrix of size batch * length, will be put
|
|
* output should be arranged as below, as an example, we set length = 14, batch = 6:
|
|
* out[0, 0], out[0, 1], out[0, 2], ..., out[0, 13], 0, 0,
|
|
* out[1, 0], out[1, 1], out[1, 2], ..., out[1, 13], 0, 0,
|
|
* ...
|
|
* out[5, 0], out[5, 1], out[5, 2], ..., out[5, 13], 0, 0,
|
|
* zeros are padded here to guarantee the distance between out[i, j] and out[i + 1, j] is ceil(out_length / 4) * 8
|
|
* @param max_indice: saving the max_indice of each batch of output
|
|
* @param max: saving the maximum value of each batch of output
|
|
* @param length: length of each input vector
|
|
* @param batch: batch of input vectors
|
|
* @return 0 -- succeed
|
|
* @return 1 -- length < 2
|
|
* @return 2 -- batch is 0
|
|
* @return 3 -- batch is not a multiple of 4
|
|
*/
|
|
uint8_t logsoftmax_int16(int16_t *input_, int16_t *output, uint16_t *max_indice, int16_t *max, uint16_t length, uint32_t batch);
|
|
|
|
/**
|
|
* @}
|
|
*/
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif |