363 lines
27 KiB
C
Executable File
363 lines
27 KiB
C
Executable File
/****************************************************************************
|
|
|
|
Copyright(c) 2019 by Aerospace C.Power (Chongqing) Microelectronics. ALL RIGHTS RESERVED.
|
|
|
|
This Information is proprietary to Aerospace C.Power (Chongqing) Microelectronics and MAY NOT
|
|
be copied by any method or incorporated into another program without
|
|
the express written consent of Aerospace C.Power. This Information or any portion
|
|
thereof remains the property of Aerospace C.Power. The Information contained herein
|
|
is believed to be accurate and Aerospace C.Power assumes no responsibility or
|
|
liability for its use in any way and conveys no license or title under
|
|
any patent or copyright and makes no representation or warranty that this
|
|
Information is free from patent or copyright infringement.
|
|
|
|
****************************************************************************/
|
|
#ifndef NN_FUNCTION_H
|
|
#define NN_FUNCTION_H
|
|
/* os shim includes */
|
|
#include "os_types.h"
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
/** \defgroup linear nn API
|
|
* @brief linear nn APIs
|
|
* Linear neural network functions, including FC(fully connect) and 1D conv
|
|
*/
|
|
|
|
/** @addtogroup linear_nn_APIs
|
|
* @{
|
|
*/
|
|
|
|
struct conv1d_config_bean {
|
|
uint32_t in_channel; // number of channel of input
|
|
uint32_t in_length; // length of input
|
|
uint32_t out_channel; // number of channel of output
|
|
uint32_t out_length; // length of output, which will be set after conv calculation is completed
|
|
uint32_t kernel_length; // length of conv kernel
|
|
uint32_t stride; // stride of conv
|
|
uint32_t dilation; // dilation of conv
|
|
uint32_t padding_left; // padding on the left
|
|
uint32_t padding_right; // padding on the right
|
|
uint32_t group; // group of the conv
|
|
};
|
|
|
|
/* @brief fc_int8_to_int8_weight_8bit() - fully connect operation as below:
|
|
output[i] = (sum(input[j] * weight[i, j]) + bias[i]) >> output_right_shift
|
|
where the sum is done on index j.
|
|
saturation included, i.e. output elements out of range [-128, 127] will be set to -128 or 127
|
|
* @param input: where the input, a vector, is put
|
|
* @param output: where the output, a vector, will be put
|
|
* @param weight: where the weight, a matrix of size out_length * in_length, is put
|
|
* weight should be arranged in the form below, as an example, we set in_length = 4, out_length = 20
|
|
* w[0, 0], w[1, 0], w[2, 0], ..., w[15, 0], w[0, 1], w[1, 1], w[2, 1], ..., w[15, 1], ..., w[0, 3], ..., w[15, 3],
|
|
* w[16, 0], w[17, 0], w[18, 0], w[19, 0], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
* w[16, 1], w[17, 1], w[18, 1], w[19, 1], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
* w[16, 2], w[17, 2], w[18, 2], w[19, 2], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
* w[16, 3], w[17, 3], w[18, 3], w[19, 3], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
* zeros are padded here to guarantee the distance between w[i, j] and w[i, j+1] in memory is 16 bytes
|
|
* @param bias: where the bias is put
|
|
* @param in_length: length of input vector
|
|
* @param out_length: length of output vector
|
|
* @param output_right_shift: the right shift number shown in the description of this function
|
|
* @return 0 -- succeed
|
|
* @return 1 -- in_length is 0
|
|
* @return 2 -- out_length is 0
|
|
*/
|
|
uint8_t fc_int8_to_int8_weight_8bit(int8_t *input, int8_t *output,
|
|
int8_t *weight, int32_t *bias, uint32_t in_length, uint32_t out_length,
|
|
uint8_t output_right_shift);
|
|
|
|
/* @brief fc_int8_to_int8_weight_8bit_bias_shift() - fully connect operation as below:
|
|
output[i] = (sum(input[j] * weight[i, j]) + bias[i] << bias_left_shift) >> output_right_shift
|
|
where the sum is done on index j.
|
|
saturation included, i.e. output elements out of range [-128, 127] will be set to -128 or 127
|
|
* @param input: where the input, a vector, is put
|
|
* @param output: where the output, a vector, will be put
|
|
* @param weight: where the weight, a matrix of size out_length * in_length, is put
|
|
* weight should be arranged in the form below, as an example, we set in_length = 4, out_length = 20
|
|
* w[0, 0], w[1, 0], w[2, 0], ..., w[15, 0], w[0, 1], w[1, 1], w[2, 1], ..., w[15, 1], ..., w[0, 3], ..., w[15, 3],
|
|
* w[16, 0], w[17, 0], w[18, 0], w[19, 0], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
* w[16, 1], w[17, 1], w[18, 1], w[19, 1], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
* w[16, 2], w[17, 2], w[18, 2], w[19, 2], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
* w[16, 3], w[17, 3], w[18, 3], w[19, 3], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
* zeros are padded here to guarantee the distance between w[i, j] and w[i, j+1] in memory is 16 bytes
|
|
* @param bias: where the bias is put
|
|
* @param in_length: length of input vector
|
|
* @param out_length: length of output vector
|
|
* @param output_right_shift: the right shift number shown in the description of this function
|
|
* @param bias_left_shift: the left shift number shown in the description of this function
|
|
* @return 0 -- succeed
|
|
* @return 1 -- in_length is 0
|
|
* @return 2 -- out_length is 0
|
|
*/
|
|
uint8_t fc_int8_to_int8_weight_8bit_bias_shift(int8_t *input, int8_t *output,
|
|
int8_t *weight, int8_t *bias, uint32_t in_length, uint32_t out_length,
|
|
uint8_t output_right_shift, uint8_t bias_left_shift);
|
|
|
|
/* @brief fc_int16_to_int16_weight_16bit() - fully connect operation as below:
|
|
output[i] = (sum(input[j] * weight[i, j]) + bias[i]) >> output_right_shift
|
|
where the sum is done on index j.
|
|
saturation included, i.e. output elements out of range [-32768, 32767] will be set to -32768 or 32767
|
|
* @param input: where the input, a vector, is put
|
|
* @param output: where the output, a vector, will be put
|
|
* @param weight: where the weight, a matrix of size out_length * in_length, is put
|
|
* weight should be arranged in the form below, as an example, we set in_length = 4, out_length = 10
|
|
* w[0, 0], w[1, 0], w[2, 0], ..., w[7, 0], w[0, 1], w[1, 1], w[2, 1], ..., w[7, 1], ..., w[0, 3], ..., w[7, 3],
|
|
* w[8, 0], w[9, 0], 0, 0, 0, 0, 0, 0,
|
|
* w[8, 1], w[9, 1], 0, 0, 0, 0, 0, 0,
|
|
* w[8, 2], w[9, 2], 0, 0, 0, 0, 0, 0,
|
|
* w[8, 3], w[9, 3], 0, 0, 0, 0, 0, 0,
|
|
* zeros are padded here to guarantee the distance between w[i, j] and w[i, j+1] in memory is 16 bytes
|
|
* @param bias: where the bias is put
|
|
* @param in_length: length of input vector
|
|
* @param out_length: length of output vector
|
|
* @param output_right_shift: the right shift number shown in the description of this function
|
|
* @param bias_left_shift: the left shift number shown in the description of this function
|
|
* @return 0 -- succeed
|
|
* @return 1 -- in_length is 0
|
|
* @return 2 -- out_length is 0
|
|
*/
|
|
uint8_t fc_int16_to_int16_weight_16bit(int16_t *input, int16_t *output,
|
|
int16_t *weight, int64_t *bias, uint32_t in_length, uint32_t out_length,
|
|
uint8_t output_right_shift);
|
|
|
|
/* @brief fc_int16_to_int16_weight_16bit_bias_shift() - fully connect operation as below:
|
|
output[i] = (sum(input[j] * weight[i, j]) + bias[i] << bias_left_shift) >> output_right_shift
|
|
where the sum is done on index j.
|
|
saturation included, i.e. output elements out of range [-32768, 32767] will be set to -32768 or 32767
|
|
* @param input: where the input, a vector, is put
|
|
* @param output: where the output, a vector, will be put
|
|
* @param weight: where the weight, a matrix of size out_length * in_length, is put
|
|
* weight should be arranged in the form below, as an example, we set in_length = 4, out_length = 20
|
|
* w[0, 0], w[1, 0], w[2, 0], ..., w[7, 0], w[0, 1], w[1, 1], w[2, 1], ..., w[7, 1], ..., w[0, 3], ..., w[7, 3],
|
|
* w[8, 0], w[9, 0], 0, 0, 0, 0, 0, 0,
|
|
* w[8, 1], w[9, 1], 0, 0, 0, 0, 0, 0,
|
|
* w[8, 2], w[9, 2], 0, 0, 0, 0, 0, 0,
|
|
* w[8, 3], w[9, 3], 0, 0, 0, 0, 0, 0,
|
|
* zeros are padded here to guarantee the distance between w[i, j] and w[i, j+1] in memory is 16 bytes
|
|
* @param bias: where the bias is put
|
|
* @param in_length: length of input vector
|
|
* @param out_length: length of output vector
|
|
* @param output_right_shift: the right shift number shown in the description of this function
|
|
* @param bias_left_shift: the left shift number shown in the description of this function
|
|
* @return 0 -- succeed
|
|
* @return 1 -- in_length is 0
|
|
* @return 2 -- out_length is 0
|
|
*/
|
|
uint8_t fc_int16_to_int16_weight_16bit_bias_shift(int16_t *input, int16_t *output,
|
|
int16_t *weight, int16_t *bias, uint32_t in_length, uint32_t out_length,
|
|
uint8_t output_right_shift, uint8_t bias_left_shift);
|
|
|
|
/* @brief conv1d_int8_to_int8_weight_8bit() - 1-dimension conv operation as below:
|
|
output[c, l] = (sum(input[i, l * stride + k * dilation - padding_left] * weight[c, i, k]) + bias[c]) >> output_right_shift
|
|
where the sum is done on index i and k.
|
|
saturation included, i.e. output elements out of range [-128, 127] will be set to -128 or 127
|
|
* @param in: where the input, a 2D-tensor of size [in_channel, in_length], is put
|
|
* input is arranged in the form below, as an example, we set in_channel = 28, in_length = 10
|
|
* input[0, 0], input[1, 0], input[2, 0], ..., input[27, 0], 0, 0, 0, 0,
|
|
* input[0, 1], input[1, 1], ..., input[27, 1], 0, 0, 0, 0, ...,
|
|
* input[0, 9], ..., input[27, 9], 0, 0, 0, 0
|
|
* zeros are padded so that the distance between input[i, j] and input[i, j + 1] is ceil(in_channel / 16.0) * 16 bytes
|
|
* @param out: where the output, a 2D-tensor, of size [out_channel, out_length] will be put, output will be arranged in the same form as input's
|
|
* where out_length = 1 + floor((in_length + padding_left + padding_right - (kernel_length - 1) * dilation - 1) / stride)
|
|
* @param weight: where the weight is put.
|
|
* if in_channel, out_channel, and group are the same, weight is a 3D-tensor of size [out_channel, 1, kernel_length].
|
|
* it should be arranged in the form below, as an example, we set kernel_length = 4, in_channel = 28, out_channel = 28
|
|
* w[0, 0, 0], w[1, 0, 0], ..., w[15, 0, 0], w[0, 0, 1], w[1, 0, 1], ..., w[15, 0, 1], ..., w[0, 0, 3], ..., w[15, 0, 3],
|
|
* w[16, 0, 0], ..., w[27, 0, 0], 0, 0, 0, 0, w[16, 0, 1], ..., w[27, 0, 1], 0, 0, 0, 0, ..., w[16, 0, 3], ..., w[27, 0, 3], 0, 0, 0, 0
|
|
* zeros are padded so that the distance between w[i, 0, k] and w[i, 0, k + 1] is always 16 bytes
|
|
* otherwise, weight is a 3D-tensor of size [out_channel, in_channel / group, kernel_length],
|
|
* it could be divided by groups, in each group, it is a 3D-tensor of size [out_channel / group, in_channel / group, kernel_length]
|
|
* weight should be arranged in the form below, as an example, we set kernel_length = 2, in_channel = 4, out_channel = 56, group = 2
|
|
* (noticing in_channel / group = 2, out_channel / group = 28, so w[0:28, :, :] is for the 1st group, w[28:55, :, :] is for the 2nd group)
|
|
* w[0, 0, 0], w[1, 0, 0], ..., w[15, 0, 0], w[0, 0, 1], w[1, 0, 1], ..., w[15, 0, 1], ..., w[0, 0, 3], ..., w[15, 0, 3],
|
|
* w[0, 1, 0], w[1, 1, 0], ..., w[15, 1, 0], w[0, 1, 1], w[1, 1, 1], ..., w[15, 1, 1], ..., w[0, 1, 3], ..., w[15, 1, 3],
|
|
* w[16, 0, 0], ..., w[27, 0, 0], 0, 0, 0, 0, w[16, 0, 1], ..., w[27, 0, 1], 0, 0, 0, 0, ..., w[16, 0, 3], ..., w[27, 0, 3], 0, 0, 0, 0
|
|
* w[16, 1, 0], ..., w[27, 1, 0], 0, 0, 0, 0, w[16, 1, 1], ..., w[27, 1, 1], 0, 0, 0, 0, ..., w[16, 1, 3], ..., w[27, 1, 3], 0, 0, 0, 0
|
|
* w[28, 0, 0], w[29, 0, 0], ..., w[43, 0, 0], w[28, 0, 1], w[29, 0, 1], ..., w[43, 0, 1], ..., w[28, 0, 3], ..., w[43, 0, 3],
|
|
* w[28, 1, 0], w[29, 1, 0], ..., w[43, 1, 0], w[28, 1, 1], w[29, 1, 1], ..., w[43, 1, 1], ..., w[28, 1, 3], ..., w[43, 1, 3],
|
|
* w[44, 0, 0], ..., w[55, 0, 0], 0, 0, 0, 0, w[44, 0, 1], ..., w[55, 0, 1], 0, 0, 0, 0, ..., w[44, 0, 3], ..., w[55, 0, 3], 0, 0, 0, 0
|
|
* w[44, 1, 0], ..., w[55, 1, 0], 0, 0, 0, 0, w[44, 1, 1], ..., w[55, 1, 1], 0, 0, 0, 0, ..., w[44, 1, 3], ..., w[55, 1, 3], 0, 0, 0, 0
|
|
* zeros are padded here to guarantee the distance between w[i, j, k] and w[i, j, k + 1] in memory is 16 bytes
|
|
* @param bias: where the bias is put, if bias is not needed, this pointer should be set to NULL
|
|
* @param bean: pointer to the config, whose description is shown in the beginning
|
|
* @param output_right_shift: the right shift number shown in the description of this function
|
|
* @return 0 -- succeed
|
|
* @return 1 -- in_channel is 0
|
|
* @return 2 -- out_channel is 0
|
|
* @return 3 -- in_length is 0
|
|
* @return 4 -- kernel_length is 0
|
|
* @return 5 -- stride is 0
|
|
* @return 6 -- dilation is 0
|
|
* @return 7 -- group is 0
|
|
* @return 8 -- in_channel % group is not 0
|
|
* @return 9 -- out_channel % group is not 0
|
|
* @return 10 -- after calculation, output_length is not a positive number
|
|
*/
|
|
uint8_t conv1d_int8_to_int8_weight_8bit(int8_t *in, int8_t *out,
|
|
int8_t *weight, int32_t *bias, struct conv1d_config_bean *bean,
|
|
uint8_t output_right_shift);
|
|
|
|
/* @brief conv1d_int8_to_int8_weight_8bit() - 1-dimension conv operation as below:
|
|
output[c, l] = (sum(input[i, l * stride + k * dilation - padding_left] * weight[c, i, k]) + bias[c] << bias_left_shift) >> output_right_shift
|
|
where the sum is done on index i and k.
|
|
saturation included, i.e. output elements out of range [-128, 127] will be set to -128 or 127
|
|
* @param in: where the input, a 2D-tensor of size [in_channel, in_length], is put
|
|
* input is arranged in the form below, as an example, we set in_channel = 28, in_length = 10
|
|
* input[0, 0], input[1, 0], input[2, 0], ..., input[27, 0], 0, 0, 0, 0,
|
|
* input[0, 1], input[1, 1], ..., input[27, 1], 0, 0, 0, 0, ...,
|
|
* input[0, 9], ..., input[27, 9], 0, 0, 0, 0
|
|
* zeros are padded so that the distance between input[i, j] and input[i, j + 1] is ceil(in_channel / 16.0) * 16 bytes
|
|
* @param out: where the output, a 2D-tensor, of size [out_channel, out_length] will be put, output will be arranged in the same form as input's
|
|
* where out_length = 1 + floor((in_length + padding_left + padding_right - (kernel_length - 1) * dilation - 1) / stride)
|
|
* @param weight: where the weight is put.
|
|
* if in_channel, out_channel, and group are the same, weight is a 3D-tensor of size [out_channel, 1, kernel_length].
|
|
* it should be arranged in the form below, as an example, we set kernel_length = 4, in_channel = 28, out_channel = 28
|
|
* w[0, 0, 0], w[1, 0, 0], ..., w[15, 0, 0], w[0, 0, 1], w[1, 0, 1], ..., w[15, 0, 1], ..., w[0, 0, 3], ..., w[15, 0, 3],
|
|
* w[16, 0, 0], ..., w[27, 0, 0], 0, 0, 0, 0, w[16, 0, 1], ..., w[27, 0, 1], 0, 0, 0, 0, ..., w[16, 0, 3], ..., w[27, 0, 3], 0, 0, 0, 0
|
|
* zeros are padded so that the distance between w[i, 0, k] and w[i, 0, k + 1] is always 16 bytes
|
|
* otherwise, weight is a 3D-tensor of size [out_channel, in_channel / group, kernel_length],
|
|
* it could be divided by groups, in each group, it is a 3D-tensor of size [out_channel / group, in_channel / group, kernel_length]
|
|
* weight should be arranged in the form below, as an example, we set kernel_length = 2, in_channel = 4, out_channel = 56, group = 2
|
|
* (noticing in_channel / group = 2, out_channel / group = 28, so w[0:28, :, :] is for the 1st group, w[28:55, :, :] is for the 2nd group)
|
|
* w[0, 0, 0], w[1, 0, 0], ..., w[15, 0, 0], w[0, 0, 1], w[1, 0, 1], ..., w[15, 0, 1], ..., w[0, 0, 3], ..., w[15, 0, 3],
|
|
* w[0, 1, 0], w[1, 1, 0], ..., w[15, 1, 0], w[0, 1, 1], w[1, 1, 1], ..., w[15, 1, 1], ..., w[0, 1, 3], ..., w[15, 1, 3],
|
|
* w[16, 0, 0], ..., w[27, 0, 0], 0, 0, 0, 0, w[16, 0, 1], ..., w[27, 0, 1], 0, 0, 0, 0, ..., w[16, 0, 3], ..., w[27, 0, 3], 0, 0, 0, 0
|
|
* w[16, 1, 0], ..., w[27, 1, 0], 0, 0, 0, 0, w[16, 1, 1], ..., w[27, 1, 1], 0, 0, 0, 0, ..., w[16, 1, 3], ..., w[27, 1, 3], 0, 0, 0, 0
|
|
* w[28, 0, 0], w[29, 0, 0], ..., w[43, 0, 0], w[28, 0, 1], w[29, 0, 1], ..., w[43, 0, 1], ..., w[28, 0, 3], ..., w[43, 0, 3],
|
|
* w[28, 1, 0], w[29, 1, 0], ..., w[43, 1, 0], w[28, 1, 1], w[29, 1, 1], ..., w[43, 1, 1], ..., w[28, 1, 3], ..., w[43, 1, 3],
|
|
* w[44, 0, 0], ..., w[55, 0, 0], 0, 0, 0, 0, w[44, 0, 1], ..., w[55, 0, 1], 0, 0, 0, 0, ..., w[44, 0, 3], ..., w[55, 0, 3], 0, 0, 0, 0
|
|
* w[44, 1, 0], ..., w[55, 1, 0], 0, 0, 0, 0, w[44, 1, 1], ..., w[55, 1, 1], 0, 0, 0, 0, ..., w[44, 1, 3], ..., w[55, 1, 3], 0, 0, 0, 0
|
|
* zeros are padded here to guarantee the distance between w[i, j, k] and w[i, j, k + 1] in memory is 16 bytes
|
|
* @param bias: where the bias is put, if bias is not needed, this pointer should be set to NULL
|
|
* @param bean: pointer to the config, whose description is shown in the beginning
|
|
* @param output_right_shift: the right shift number shown in the description of this function
|
|
* @param bias_left_shift: the left shift number shown in the description of this function
|
|
* @return 0 -- succeed
|
|
* @return 1 -- in_channel is 0
|
|
* @return 2 -- out_channel is 0
|
|
* @return 3 -- in_length is 0
|
|
* @return 4 -- kernel_length is 0
|
|
* @return 5 -- stride is 0
|
|
* @return 6 -- dilation is 0
|
|
* @return 7 -- group is 0
|
|
* @return 8 -- in_channel % group is not 0
|
|
* @return 9 -- out_channel % group is not 0
|
|
* @return 10 -- after calculation, output_length is not a positive number
|
|
*/
|
|
uint8_t conv1d_int8_to_int8_weight_8bit_bias_shift(int8_t *in, int8_t *out,
|
|
int8_t *weight, int8_t *bias, struct conv1d_config_bean *bean,
|
|
uint8_t output_right_shift, uint8_t bias_left_shift);
|
|
|
|
/* @brief conv1d_int16_to_int16_weight_16bit() - 1-dimension conv operation as below:
|
|
output[c, l] = (sum(input[i, l * stride + k * dilation - padding_left] * weight[c, i, k]) + bias[c]) >> output_right_shift
|
|
where the sum is done on index i and k.
|
|
saturation included, i.e. output elements out of range [-32768, 32767] will be set to -32768 or 32767
|
|
* @param in: where the input, a 2D-tensor of size [in_channel, in_length], is put
|
|
* input is arranged in the form below, as an example, we set in_channel = 14, in_length = 10
|
|
* input[0, 0], input[1, 0], input[2, 0], ..., input[13, 0], 0, 0,
|
|
* input[0, 1], input[1, 1], ..., input[13, 1], 0, 0, ...,
|
|
* input[0, 9], ..., input[13, 9], 0, 0
|
|
* zeros are padded so that the distance between input[i, j] and input[i, j + 1] is ceil(in_channel / 8.0) * 16 bytes
|
|
* @param out: where the output, a 2D-tensor, of size [out_channel, out_length] will be put, output will be arranged in the same form as input's
|
|
* where out_length = 1 + floor((in_length + padding_left + padding_right - (kernel_length - 1) * dilation - 1) / stride)
|
|
* @param weight: where the weight is put.
|
|
* if in_channel, out_channel, and group are the same, weight is a 3D-tensor of size [out_channel, 1, kernel_length].
|
|
* it should be arranged in the form below, as an example, we set kernel_length = 4, in_channel = 14, out_channel = 14
|
|
* w[0, 0, 0], w[1, 0, 0], ..., w[7, 0, 0], w[0, 0, 1], w[1, 0, 1], ..., w[7, 0, 1], ..., w[0, 0, 3], ..., w[7, 0, 3],
|
|
* w[8, 0, 0], ..., w[13, 0, 0], 0, 0, 0, 0, w[8, 0, 1], ..., w[13, 0, 1], 0, 0, 0, 0, ..., w[8, 0, 3], ..., w[13, 0, 3], 0, 0, 0, 0
|
|
* zeros are padded so that the distance between w[i, 0, k] and w[i, 0, k + 1] is always 16 bytes
|
|
* otherwise, weight is a 3D-tensor of size [out_channel, in_channel / group, kernel_length],
|
|
* it could be divided by groups, in each group, it is a 3D-tensor of size [out_channel / group, in_channel / group, kernel_length]
|
|
* weight should be arranged in the form below, as an example, we set kernel_length = 2, in_channel = 4, out_channel = 28, group = 2
|
|
* (noticing in_channel / group = 2, out_channel / group = 14, so w[0:14, :, :] is for the 1st group, w[14:27, :, :] is for the 2nd group)
|
|
* w[0, 0, 0], w[1, 0, 0], ..., w[7, 0, 0], w[0, 0, 1], w[1, 0, 1], ..., w[7, 0, 1], ..., w[0, 0, 3], ..., w[7, 0, 3],
|
|
* w[0, 1, 0], w[1, 1, 0], ..., w[7, 1, 0], w[0, 1, 1], w[1, 1, 1], ..., w[7, 1, 1], ..., w[0, 1, 3], ..., w[7, 1, 3],
|
|
* w[8, 0, 0], ..., w[13, 0, 0], 0, 0, w[8, 0, 1], ..., w[13, 0, 1], 0, 0, ..., w[8, 0, 3], ..., w[13, 0, 3], 0, 0
|
|
* w[8, 1, 0], ..., w[13, 1, 0], 0, 0, w[8, 1, 1], ..., w[13, 1, 1], 0, 0, ..., w[8, 1, 3], ..., w[13, 1, 3], 0, 0
|
|
* w[14, 0, 0], w[15, 0, 0], ..., w[21, 0, 0], w[14, 0, 1], w[15, 0, 1], ..., w[21, 0, 1], ..., w[14, 0, 3], ..., w[21, 0, 3],
|
|
* w[14, 1, 0], w[15, 1, 0], ..., w[21, 1, 0], w[14, 1, 1], w[15, 1, 1], ..., w[21, 1, 1], ..., w[14, 1, 3], ..., w[21, 1, 3],
|
|
* w[22, 0, 0], ..., w[27, 0, 0], 0, 0, w[22, 0, 1], ..., w[27, 0, 1], 0, 0, ..., w[22, 0, 3], ..., w[27, 0, 3], 0, 0
|
|
* w[22, 1, 0], ..., w[27, 1, 0], 0, 0, w[22, 1, 1], ..., w[27, 1, 1], 0, 0, ..., w[22, 1, 3], ..., w[27, 1, 3], 0, 0
|
|
* zeros are padded here to guarantee the distance between w[i, j, k] and w[i, j, k + 1] in memory is 16 bytes
|
|
* @param bias: where the bias is put, if bias is not needed, this pointer should be set to NULL
|
|
* @param bean: pointer to the config, whose description is shown in the beginning
|
|
* @param output_right_shift: the right shift number shown in the description of this function
|
|
* @return 0 -- succeed
|
|
* @return 1 -- in_channel is 0
|
|
* @return 2 -- out_channel is 0
|
|
* @return 3 -- in_length is 0
|
|
* @return 4 -- kernel_length is 0
|
|
* @return 5 -- stride is 0
|
|
* @return 6 -- dilation is 0
|
|
* @return 7 -- group is 0
|
|
* @return 8 -- in_channel % group is not 0
|
|
* @return 9 -- out_channel % group is not 0
|
|
* @return 10 -- after calculation, output_length is not a positive number
|
|
*/
|
|
uint8_t conv1d_int16_to_int16_weight_16bit(int16_t *in, int16_t *out,
|
|
int16_t *weight, int64_t *bias, struct conv1d_config_bean *bean,
|
|
uint8_t output_right_shift);
|
|
|
|
/* @brief conv1d_int16_to_int16_weight_16bit_bias_shift() - 1-dimension conv operation as below:
|
|
output[c, l] = (sum(input[i, l * stride + k * dilation - padding_left] * weight[c, i, k]) + bias[c] << bias_left_shift) >> output_right_shift
|
|
where the sum is done on index i and k.
|
|
saturation included, i.e. output elements out of range [-32768, 32767] will be set to -32768 or 32767
|
|
* @param in: where the input, a 2D-tensor of size [in_channel, in_length], is put
|
|
* input is arranged in the form below, as an example, we set in_channel = 14, in_length = 10
|
|
* input[0, 0], input[1, 0], input[2, 0], ..., input[13, 0], 0, 0,
|
|
* input[0, 1], input[1, 1], ..., input[13, 1], 0, 0, ...,
|
|
* input[0, 9], ..., input[13, 9], 0, 0
|
|
* zeros are padded so that the distance between input[i, j] and input[i, j + 1] is ceil(in_channel / 8.0) * 16 bytes
|
|
* @param out: where the output, a 2D-tensor, of size [out_channel, out_length] will be put, output will be arranged in the same form as input's
|
|
* where out_length = 1 + floor((in_length + padding_left + padding_right - (kernel_length - 1) * dilation - 1) / stride)
|
|
* @param weight: where the weight is put.
|
|
* if in_channel, out_channel, and group are the same, weight is a 3D-tensor of size [out_channel, 1, kernel_length].
|
|
* it should be arranged in the form below, as an example, we set kernel_length = 4, in_channel = 14, out_channel = 14
|
|
* w[0, 0, 0], w[1, 0, 0], ..., w[7, 0, 0], w[0, 0, 1], w[1, 0, 1], ..., w[7, 0, 1], ..., w[0, 0, 3], ..., w[7, 0, 3],
|
|
* w[8, 0, 0], ..., w[13, 0, 0], 0, 0, 0, 0, w[8, 0, 1], ..., w[13, 0, 1], 0, 0, 0, 0, ..., w[8, 0, 3], ..., w[13, 0, 3], 0, 0, 0, 0
|
|
* zeros are padded so that the distance between w[i, 0, k] and w[i, 0, k + 1] is always 16 bytes
|
|
* otherwise, weight is a 3D-tensor of size [out_channel, in_channel / group, kernel_length],
|
|
* it could be divided by groups, in each group, it is a 3D-tensor of size [out_channel / group, in_channel / group, kernel_length]
|
|
* weight should be arranged in the form below, as an example, we set kernel_length = 2, in_channel = 4, out_channel = 28, group = 2
|
|
* (noticing in_channel / group = 2, out_channel / group = 14, so w[0:14, :, :] is for the 1st group, w[14:27, :, :] is for the 2nd group)
|
|
* w[0, 0, 0], w[1, 0, 0], ..., w[7, 0, 0], w[0, 0, 1], w[1, 0, 1], ..., w[7, 0, 1], ..., w[0, 0, 3], ..., w[7, 0, 3],
|
|
* w[0, 1, 0], w[1, 1, 0], ..., w[7, 1, 0], w[0, 1, 1], w[1, 1, 1], ..., w[7, 1, 1], ..., w[0, 1, 3], ..., w[7, 1, 3],
|
|
* w[8, 0, 0], ..., w[13, 0, 0], 0, 0, w[8, 0, 1], ..., w[13, 0, 1], 0, 0, ..., w[8, 0, 3], ..., w[13, 0, 3], 0, 0
|
|
* w[8, 1, 0], ..., w[13, 1, 0], 0, 0, w[8, 1, 1], ..., w[13, 1, 1], 0, 0, ..., w[8, 1, 3], ..., w[13, 1, 3], 0, 0
|
|
* w[14, 0, 0], w[15, 0, 0], ..., w[21, 0, 0], w[14, 0, 1], w[15, 0, 1], ..., w[21, 0, 1], ..., w[14, 0, 3], ..., w[21, 0, 3],
|
|
* w[14, 1, 0], w[15, 1, 0], ..., w[21, 1, 0], w[14, 1, 1], w[15, 1, 1], ..., w[21, 1, 1], ..., w[14, 1, 3], ..., w[21, 1, 3],
|
|
* w[22, 0, 0], ..., w[27, 0, 0], 0, 0, w[22, 0, 1], ..., w[27, 0, 1], 0, 0, ..., w[22, 0, 3], ..., w[27, 0, 3], 0, 0
|
|
* w[22, 1, 0], ..., w[27, 1, 0], 0, 0, w[22, 1, 1], ..., w[27, 1, 1], 0, 0, ..., w[22, 1, 3], ..., w[27, 1, 3], 0, 0
|
|
* zeros are padded here to guarantee the distance between w[i, j, k] and w[i, j, k + 1] in memory is 16 bytes
|
|
* @param bias: where the bias is put, if bias is not needed, this pointer should be set to NULL
|
|
* @param bean: pointer to the config, whose description is shown in the beginning
|
|
* @param output_right_shift: the right shift number shown in the description of this function
|
|
* @param bias_left_shift: the left shift number shown in the description of this function
|
|
* @return 0 -- succeed
|
|
* @return 1 -- in_channel is 0
|
|
* @return 2 -- out_channel is 0
|
|
* @return 3 -- in_length is 0
|
|
* @return 4 -- kernel_length is 0
|
|
* @return 5 -- stride is 0
|
|
* @return 6 -- dilation is 0
|
|
* @return 7 -- group is 0
|
|
* @return 8 -- in_channel % group is not 0
|
|
* @return 9 -- out_channel % group is not 0
|
|
* @return 10 -- after calculation, output_length is not a positive number
|
|
*/
|
|
uint8_t conv1d_int16_to_int16_weight_16bit_bias_shift(int16_t *in, int16_t *out,
|
|
int16_t *weight, int16_t *bias, struct conv1d_config_bean *bean,
|
|
uint8_t output_right_shift, uint8_t bias_left_shift);
|
|
|
|
/**
|
|
* @}
|
|
*/
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif |