363 lines
		
	
	
		
			27 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
		
		
			
		
	
	
			363 lines
		
	
	
		
			27 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| 
								 | 
							
								/****************************************************************************
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								Copyright(c) 2019 by Aerospace C.Power (Chongqing) Microelectronics. ALL RIGHTS RESERVED.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								This Information is proprietary to Aerospace C.Power (Chongqing) Microelectronics and MAY NOT
							 | 
						||
| 
								 | 
							
								be copied by any method or incorporated into another program without
							 | 
						||
| 
								 | 
							
								the express written consent of Aerospace C.Power. This Information or any portion
							 | 
						||
| 
								 | 
							
								thereof remains the property of Aerospace C.Power. The Information contained herein
							 | 
						||
| 
								 | 
							
								is believed to be accurate and Aerospace C.Power assumes no responsibility or
							 | 
						||
| 
								 | 
							
								liability for its use in any way and conveys no license or title under
							 | 
						||
| 
								 | 
							
								any patent or copyright and makes no representation or warranty that this
							 | 
						||
| 
								 | 
							
								Information is free from patent or copyright infringement.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								****************************************************************************/
							 | 
						||
| 
								 | 
							
								#ifndef NN_FUNCTION_H
							 | 
						||
| 
								 | 
							
								#define NN_FUNCTION_H
							 | 
						||
| 
								 | 
							
								/* os shim includes */
							 | 
						||
| 
								 | 
							
								#include "os_types.h"
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef __cplusplus
							 | 
						||
| 
								 | 
							
								extern "C" {
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/** \defgroup linear nn API
							 | 
						||
| 
								 | 
							
								  * @brief linear nn APIs
							 | 
						||
| 
								 | 
							
								  * Linear neural network functions, including FC(fully connect) and 1D conv
							 | 
						||
| 
								 | 
							
								  */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/** @addtogroup linear_nn_APIs
							 | 
						||
| 
								 | 
							
								  * @{
							 | 
						||
| 
								 | 
							
								  */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								struct conv1d_config_bean {
							 | 
						||
| 
								 | 
							
								    uint32_t in_channel; // number of channel of input
							 | 
						||
| 
								 | 
							
								    uint32_t in_length; // length of input
							 | 
						||
| 
								 | 
							
								    uint32_t out_channel; // number of channel of output
							 | 
						||
| 
								 | 
							
								    uint32_t out_length; // length of output, which will be set after conv calculation is completed
							 | 
						||
| 
								 | 
							
								    uint32_t kernel_length; // length of conv kernel
							 | 
						||
| 
								 | 
							
								    uint32_t stride; // stride of conv
							 | 
						||
| 
								 | 
							
								    uint32_t dilation; // dilation of conv
							 | 
						||
| 
								 | 
							
								    uint32_t padding_left; // padding on the left
							 | 
						||
| 
								 | 
							
								    uint32_t padding_right; // padding on the right
							 | 
						||
| 
								 | 
							
								    uint32_t group; // group of the conv
							 | 
						||
| 
								 | 
							
								};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* @brief fc_int8_to_int8_weight_8bit() - fully connect operation as below:
							 | 
						||
| 
								 | 
							
								          output[i] = (sum(input[j] * weight[i, j]) + bias[i]) >> output_right_shift
							 | 
						||
| 
								 | 
							
								          where the sum is done on index j.
							 | 
						||
| 
								 | 
							
								          saturation included, i.e. output elements out of range [-128, 127] will be set to -128 or 127
							 | 
						||
| 
								 | 
							
								 * @param input: where the input, a vector, is put
							 | 
						||
| 
								 | 
							
								 * @param output: where the output, a vector, will be put
							 | 
						||
| 
								 | 
							
								 * @param weight: where the weight, a matrix of size out_length * in_length, is put
							 | 
						||
| 
								 | 
							
								 *                weight should be arranged in the form below, as an example, we set in_length = 4, out_length = 20
							 | 
						||
| 
								 | 
							
								 *                w[0, 0], w[1, 0], w[2, 0], ..., w[15, 0], w[0, 1], w[1, 1], w[2, 1], ..., w[15, 1], ..., w[0, 3], ..., w[15, 3],
							 | 
						||
| 
								 | 
							
								 *                w[16, 0], w[17, 0], w[18, 0], w[19, 0], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
							 | 
						||
| 
								 | 
							
								 *                w[16, 1], w[17, 1], w[18, 1], w[19, 1], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
							 | 
						||
| 
								 | 
							
								 *                w[16, 2], w[17, 2], w[18, 2], w[19, 2], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
							 | 
						||
| 
								 | 
							
								 *                w[16, 3], w[17, 3], w[18, 3], w[19, 3], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
							 | 
						||
| 
								 | 
							
								 *                zeros are padded here to guarantee the distance between w[i, j] and w[i, j+1] in memory is 16 bytes
							 | 
						||
| 
								 | 
							
								 * @param bias: where the bias is put
							 | 
						||
| 
								 | 
							
								 * @param in_length: length of input vector
							 | 
						||
| 
								 | 
							
								 * @param out_length: length of output vector
							 | 
						||
| 
								 | 
							
								 * @param output_right_shift: the right shift number shown in the description of this function
							 | 
						||
| 
								 | 
							
								 * @return 0 -- succeed
							 | 
						||
| 
								 | 
							
								 * @return 1 -- in_length is 0
							 | 
						||
| 
								 | 
							
								 * @return 2 -- out_length is 0
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								uint8_t fc_int8_to_int8_weight_8bit(int8_t *input, int8_t *output, 
							 | 
						||
| 
								 | 
							
								    int8_t *weight, int32_t *bias, uint32_t in_length, uint32_t out_length, 
							 | 
						||
| 
								 | 
							
								    uint8_t output_right_shift);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* @brief fc_int8_to_int8_weight_8bit_bias_shift() - fully connect operation as below:
							 | 
						||
| 
								 | 
							
								          output[i] = (sum(input[j] * weight[i, j]) + bias[i] << bias_left_shift) >> output_right_shift
							 | 
						||
| 
								 | 
							
								          where the sum is done on index j.
							 | 
						||
| 
								 | 
							
								          saturation included, i.e. output elements out of range [-128, 127] will be set to -128 or 127
							 | 
						||
| 
								 | 
							
								 * @param input: where the input, a vector, is put
							 | 
						||
| 
								 | 
							
								 * @param output: where the output, a vector, will be put
							 | 
						||
| 
								 | 
							
								 * @param weight: where the weight, a matrix of size out_length * in_length, is put
							 | 
						||
| 
								 | 
							
								 *                weight should be arranged in the form below, as an example, we set in_length = 4, out_length = 20
							 | 
						||
| 
								 | 
							
								 *                w[0, 0], w[1, 0], w[2, 0], ..., w[15, 0], w[0, 1], w[1, 1], w[2, 1], ..., w[15, 1], ..., w[0, 3], ..., w[15, 3],
							 | 
						||
| 
								 | 
							
								 *                w[16, 0], w[17, 0], w[18, 0], w[19, 0], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
							 | 
						||
| 
								 | 
							
								 *                w[16, 1], w[17, 1], w[18, 1], w[19, 1], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
							 | 
						||
| 
								 | 
							
								 *                w[16, 2], w[17, 2], w[18, 2], w[19, 2], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
							 | 
						||
| 
								 | 
							
								 *                w[16, 3], w[17, 3], w[18, 3], w[19, 3], 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
							 | 
						||
| 
								 | 
							
								 *                zeros are padded here to guarantee the distance between w[i, j] and w[i, j+1] in memory is 16 bytes
							 | 
						||
| 
								 | 
							
								 * @param bias: where the bias is put
							 | 
						||
| 
								 | 
							
								 * @param in_length: length of input vector
							 | 
						||
| 
								 | 
							
								 * @param out_length: length of output vector
							 | 
						||
| 
								 | 
							
								 * @param output_right_shift: the right shift number shown in the description of this function
							 | 
						||
| 
								 | 
							
								 * @param bias_left_shift: the left shift number shown in the description of this function
							 | 
						||
| 
								 | 
							
								 * @return 0 -- succeed
							 | 
						||
| 
								 | 
							
								 * @return 1 -- in_length is 0
							 | 
						||
| 
								 | 
							
								 * @return 2 -- out_length is 0
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								uint8_t fc_int8_to_int8_weight_8bit_bias_shift(int8_t *input, int8_t *output, 
							 | 
						||
| 
								 | 
							
								    int8_t *weight, int8_t *bias, uint32_t in_length, uint32_t out_length, 
							 | 
						||
| 
								 | 
							
								    uint8_t output_right_shift, uint8_t bias_left_shift);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* @brief fc_int16_to_int16_weight_16bit() - fully connect operation as below:
							 | 
						||
| 
								 | 
							
								          output[i] = (sum(input[j] * weight[i, j]) + bias[i]) >> output_right_shift
							 | 
						||
| 
								 | 
							
								          where the sum is done on index j.
							 | 
						||
| 
								 | 
							
								          saturation included, i.e. output elements out of range [-32768, 32767] will be set to -32768 or 32767
							 | 
						||
| 
								 | 
							
								 * @param input: where the input, a vector, is put
							 | 
						||
| 
								 | 
							
								 * @param output: where the output, a vector, will be put
							 | 
						||
| 
								 | 
							
								 * @param weight: where the weight, a matrix of size out_length * in_length, is put
							 | 
						||
| 
								 | 
							
								 *                weight should be arranged in the form below, as an example, we set in_length = 4, out_length = 10
							 | 
						||
| 
								 | 
							
								 *                w[0, 0], w[1, 0], w[2, 0], ..., w[7, 0], w[0, 1], w[1, 1], w[2, 1], ..., w[7, 1], ..., w[0, 3], ..., w[7, 3],
							 | 
						||
| 
								 | 
							
								 *                w[8, 0], w[9, 0], 0, 0, 0, 0, 0, 0, 
							 | 
						||
| 
								 | 
							
								 *                w[8, 1], w[9, 1], 0, 0, 0, 0, 0, 0, 
							 | 
						||
| 
								 | 
							
								 *                w[8, 2], w[9, 2], 0, 0, 0, 0, 0, 0, 
							 | 
						||
| 
								 | 
							
								 *                w[8, 3], w[9, 3], 0, 0, 0, 0, 0, 0, 
							 | 
						||
| 
								 | 
							
								 *                zeros are padded here to guarantee the distance between w[i, j] and w[i, j+1] in memory is 16 bytes
							 | 
						||
| 
								 | 
							
								 * @param bias: where the bias is put
							 | 
						||
| 
								 | 
							
								 * @param in_length: length of input vector
							 | 
						||
| 
								 | 
							
								 * @param out_length: length of output vector
							 | 
						||
| 
								 | 
							
								 * @param output_right_shift: the right shift number shown in the description of this function
							 | 
						||
| 
								 | 
							
								 * @param bias_left_shift: the left shift number shown in the description of this function
							 | 
						||
| 
								 | 
							
								 * @return 0 -- succeed
							 | 
						||
| 
								 | 
							
								 * @return 1 -- in_length is 0
							 | 
						||
| 
								 | 
							
								 * @return 2 -- out_length is 0
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								uint8_t fc_int16_to_int16_weight_16bit(int16_t *input, int16_t *output, 
							 | 
						||
| 
								 | 
							
								    int16_t *weight, int64_t *bias, uint32_t in_length, uint32_t out_length, 
							 | 
						||
| 
								 | 
							
								    uint8_t output_right_shift);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* @brief fc_int16_to_int16_weight_16bit_bias_shift() - fully connect operation as below:
							 | 
						||
| 
								 | 
							
								          output[i] = (sum(input[j] * weight[i, j]) + bias[i] << bias_left_shift) >> output_right_shift
							 | 
						||
| 
								 | 
							
								          where the sum is done on index j.
							 | 
						||
| 
								 | 
							
								          saturation included, i.e. output elements out of range [-32768, 32767] will be set to -32768 or 32767
							 | 
						||
| 
								 | 
							
								 * @param input: where the input, a vector, is put
							 | 
						||
| 
								 | 
							
								 * @param output: where the output, a vector, will be put
							 | 
						||
| 
								 | 
							
								 * @param weight: where the weight, a matrix of size out_length * in_length, is put
							 | 
						||
| 
								 | 
							
								 *                weight should be arranged in the form below, as an example, we set in_length = 4, out_length = 20
							 | 
						||
| 
								 | 
							
								 *                w[0, 0], w[1, 0], w[2, 0], ..., w[7, 0], w[0, 1], w[1, 1], w[2, 1], ..., w[7, 1], ..., w[0, 3], ..., w[7, 3],
							 | 
						||
| 
								 | 
							
								 *                w[8, 0], w[9, 0], 0, 0, 0, 0, 0, 0, 
							 | 
						||
| 
								 | 
							
								 *                w[8, 1], w[9, 1], 0, 0, 0, 0, 0, 0, 
							 | 
						||
| 
								 | 
							
								 *                w[8, 2], w[9, 2], 0, 0, 0, 0, 0, 0, 
							 | 
						||
| 
								 | 
							
								 *                w[8, 3], w[9, 3], 0, 0, 0, 0, 0, 0, 
							 | 
						||
| 
								 | 
							
								 *                zeros are padded here to guarantee the distance between w[i, j] and w[i, j+1] in memory is 16 bytes
							 | 
						||
| 
								 | 
							
								 * @param bias: where the bias is put
							 | 
						||
| 
								 | 
							
								 * @param in_length: length of input vector
							 | 
						||
| 
								 | 
							
								 * @param out_length: length of output vector
							 | 
						||
| 
								 | 
							
								 * @param output_right_shift: the right shift number shown in the description of this function
							 | 
						||
| 
								 | 
							
								 * @param bias_left_shift: the left shift number shown in the description of this function
							 | 
						||
| 
								 | 
							
								 * @return 0 -- succeed
							 | 
						||
| 
								 | 
							
								 * @return 1 -- in_length is 0
							 | 
						||
| 
								 | 
							
								 * @return 2 -- out_length is 0
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								uint8_t fc_int16_to_int16_weight_16bit_bias_shift(int16_t *input, int16_t *output, 
							 | 
						||
| 
								 | 
							
								    int16_t *weight, int16_t *bias, uint32_t in_length, uint32_t out_length, 
							 | 
						||
| 
								 | 
							
								    uint8_t output_right_shift, uint8_t bias_left_shift);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* @brief conv1d_int8_to_int8_weight_8bit() - 1-dimension conv operation as below:
							 | 
						||
| 
								 | 
							
								          output[c, l] = (sum(input[i, l * stride + k * dilation - padding_left] * weight[c, i, k]) + bias[c]) >> output_right_shift
							 | 
						||
| 
								 | 
							
								          where the sum is done on index i and k.
							 | 
						||
| 
								 | 
							
								          saturation included, i.e. output elements out of range [-128, 127] will be set to -128 or 127
							 | 
						||
| 
								 | 
							
								 * @param in: where the input, a 2D-tensor of size [in_channel, in_length], is put
							 | 
						||
| 
								 | 
							
								 *            input is arranged in the form below, as an example, we set in_channel = 28, in_length = 10
							 | 
						||
| 
								 | 
							
								 *            input[0, 0], input[1, 0], input[2, 0], ..., input[27, 0], 0, 0, 0, 0, 
							 | 
						||
| 
								 | 
							
								 *            input[0, 1], input[1, 1], ..., input[27, 1], 0, 0, 0, 0, ..., 
							 | 
						||
| 
								 | 
							
								 *            input[0, 9], ..., input[27, 9], 0, 0, 0, 0
							 | 
						||
| 
								 | 
							
								 *            zeros are padded so that the distance between input[i, j] and input[i, j + 1] is ceil(in_channel / 16.0) * 16 bytes
							 | 
						||
| 
								 | 
							
								 * @param out: where the output, a 2D-tensor, of size [out_channel, out_length] will be put, output will be arranged in the same form as input's
							 | 
						||
| 
								 | 
							
								 *             where out_length = 1 + floor((in_length + padding_left + padding_right - (kernel_length - 1) * dilation - 1) / stride)
							 | 
						||
| 
								 | 
							
								 * @param weight: where the weight is put.
							 | 
						||
| 
								 | 
							
								 *                if in_channel, out_channel, and group are the same, weight is a 3D-tensor of size [out_channel, 1, kernel_length].
							 | 
						||
| 
								 | 
							
								 *                it should be arranged in the form below, as an example, we set kernel_length = 4, in_channel = 28, out_channel = 28
							 | 
						||
| 
								 | 
							
								 *                w[0, 0, 0], w[1, 0, 0], ..., w[15, 0, 0], w[0, 0, 1], w[1, 0, 1], ..., w[15, 0, 1], ..., w[0, 0, 3], ..., w[15, 0, 3],
							 | 
						||
| 
								 | 
							
								 *                w[16, 0, 0], ..., w[27, 0, 0], 0, 0, 0, 0, w[16, 0, 1], ..., w[27, 0, 1], 0, 0, 0, 0, ..., w[16, 0, 3], ..., w[27, 0, 3], 0, 0, 0, 0
							 | 
						||
| 
								 | 
							
								 *                zeros are padded so that the distance between w[i, 0, k] and w[i, 0, k + 1] is always 16 bytes
							 | 
						||
| 
								 | 
							
								 *                otherwise, weight is a 3D-tensor of size [out_channel, in_channel / group, kernel_length], 
							 | 
						||
| 
								 | 
							
								 *                it could be divided by groups, in each group, it is a 3D-tensor of size [out_channel / group, in_channel / group, kernel_length]
							 | 
						||
| 
								 | 
							
								 *                weight should be arranged in the form below, as an example, we set kernel_length = 2, in_channel = 4, out_channel = 56, group = 2
							 | 
						||
| 
								 | 
							
								 *                (noticing in_channel / group = 2, out_channel / group = 28, so w[0:28, :, :] is for the 1st group, w[28:55, :, :] is for the 2nd group)
							 | 
						||
| 
								 | 
							
								 *                w[0, 0, 0], w[1, 0, 0], ..., w[15, 0, 0], w[0, 0, 1], w[1, 0, 1], ..., w[15, 0, 1], ..., w[0, 0, 3], ..., w[15, 0, 3],
							 | 
						||
| 
								 | 
							
								 *                w[0, 1, 0], w[1, 1, 0], ..., w[15, 1, 0], w[0, 1, 1], w[1, 1, 1], ..., w[15, 1, 1], ..., w[0, 1, 3], ..., w[15, 1, 3],
							 | 
						||
| 
								 | 
							
								 *                w[16, 0, 0], ..., w[27, 0, 0], 0, 0, 0, 0, w[16, 0, 1], ..., w[27, 0, 1], 0, 0, 0, 0, ..., w[16, 0, 3], ..., w[27, 0, 3], 0, 0, 0, 0
							 | 
						||
| 
								 | 
							
								 *                w[16, 1, 0], ..., w[27, 1, 0], 0, 0, 0, 0, w[16, 1, 1], ..., w[27, 1, 1], 0, 0, 0, 0, ..., w[16, 1, 3], ..., w[27, 1, 3], 0, 0, 0, 0
							 | 
						||
| 
								 | 
							
								 *                w[28, 0, 0], w[29, 0, 0], ..., w[43, 0, 0], w[28, 0, 1], w[29, 0, 1], ..., w[43, 0, 1], ..., w[28, 0, 3], ..., w[43, 0, 3],
							 | 
						||
| 
								 | 
							
								 *                w[28, 1, 0], w[29, 1, 0], ..., w[43, 1, 0], w[28, 1, 1], w[29, 1, 1], ..., w[43, 1, 1], ..., w[28, 1, 3], ..., w[43, 1, 3],
							 | 
						||
| 
								 | 
							
								 *                w[44, 0, 0], ..., w[55, 0, 0], 0, 0, 0, 0, w[44, 0, 1], ..., w[55, 0, 1], 0, 0, 0, 0, ..., w[44, 0, 3], ..., w[55, 0, 3], 0, 0, 0, 0
							 | 
						||
| 
								 | 
							
								 *                w[44, 1, 0], ..., w[55, 1, 0], 0, 0, 0, 0, w[44, 1, 1], ..., w[55, 1, 1], 0, 0, 0, 0, ..., w[44, 1, 3], ..., w[55, 1, 3], 0, 0, 0, 0
							 | 
						||
| 
								 | 
							
								 *                zeros are padded here to guarantee the distance between w[i, j, k] and w[i, j, k + 1] in memory is 16 bytes
							 | 
						||
| 
								 | 
							
								 * @param bias: where the bias is put, if bias is not needed, this pointer should be set to NULL
							 | 
						||
| 
								 | 
							
								 * @param bean: pointer to the config, whose description is shown in the beginning
							 | 
						||
| 
								 | 
							
								 * @param output_right_shift: the right shift number shown in the description of this function
							 | 
						||
| 
								 | 
							
								 * @return 0 -- succeed
							 | 
						||
| 
								 | 
							
								 * @return 1 -- in_channel is 0
							 | 
						||
| 
								 | 
							
								 * @return 2 -- out_channel is 0
							 | 
						||
| 
								 | 
							
								 * @return 3 -- in_length is 0
							 | 
						||
| 
								 | 
							
								 * @return 4 -- kernel_length is 0
							 | 
						||
| 
								 | 
							
								 * @return 5 -- stride is 0
							 | 
						||
| 
								 | 
							
								 * @return 6 -- dilation is 0
							 | 
						||
| 
								 | 
							
								 * @return 7 -- group is 0
							 | 
						||
| 
								 | 
							
								 * @return 8 -- in_channel % group is not 0
							 | 
						||
| 
								 | 
							
								 * @return 9 -- out_channel % group is not 0
							 | 
						||
| 
								 | 
							
								 * @return 10 -- after calculation, output_length is not a positive number
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								uint8_t conv1d_int8_to_int8_weight_8bit(int8_t *in, int8_t *out, 
							 | 
						||
| 
								 | 
							
								    int8_t *weight, int32_t *bias, struct conv1d_config_bean *bean, 
							 | 
						||
| 
								 | 
							
								    uint8_t output_right_shift);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* @brief conv1d_int8_to_int8_weight_8bit() - 1-dimension conv operation as below:
							 | 
						||
| 
								 | 
							
								          output[c, l] = (sum(input[i, l * stride + k * dilation - padding_left] * weight[c, i, k]) + bias[c] << bias_left_shift) >> output_right_shift
							 | 
						||
| 
								 | 
							
								          where the sum is done on index i and k.
							 | 
						||
| 
								 | 
							
								          saturation included, i.e. output elements out of range [-128, 127] will be set to -128 or 127
							 | 
						||
| 
								 | 
							
								 * @param in: where the input, a 2D-tensor of size [in_channel, in_length], is put
							 | 
						||
| 
								 | 
							
								 *            input is arranged in the form below, as an example, we set in_channel = 28, in_length = 10
							 | 
						||
| 
								 | 
							
								 *            input[0, 0], input[1, 0], input[2, 0], ..., input[27, 0], 0, 0, 0, 0, 
							 | 
						||
| 
								 | 
							
								 *            input[0, 1], input[1, 1], ..., input[27, 1], 0, 0, 0, 0, ..., 
							 | 
						||
| 
								 | 
							
								 *            input[0, 9], ..., input[27, 9], 0, 0, 0, 0
							 | 
						||
| 
								 | 
							
								 *            zeros are padded so that the distance between input[i, j] and input[i, j + 1] is ceil(in_channel / 16.0) * 16 bytes
							 | 
						||
| 
								 | 
							
								 * @param out: where the output, a 2D-tensor, of size [out_channel, out_length] will be put, output will be arranged in the same form as input's
							 | 
						||
| 
								 | 
							
								 *             where out_length = 1 + floor((in_length + padding_left + padding_right - (kernel_length - 1) * dilation - 1) / stride)
							 | 
						||
| 
								 | 
							
								 * @param weight: where the weight is put.
							 | 
						||
| 
								 | 
							
								 *                if in_channel, out_channel, and group are the same, weight is a 3D-tensor of size [out_channel, 1, kernel_length].
							 | 
						||
| 
								 | 
							
								 *                it should be arranged in the form below, as an example, we set kernel_length = 4, in_channel = 28, out_channel = 28
							 | 
						||
| 
								 | 
							
								 *                w[0, 0, 0], w[1, 0, 0], ..., w[15, 0, 0], w[0, 0, 1], w[1, 0, 1], ..., w[15, 0, 1], ..., w[0, 0, 3], ..., w[15, 0, 3],
							 | 
						||
| 
								 | 
							
								 *                w[16, 0, 0], ..., w[27, 0, 0], 0, 0, 0, 0, w[16, 0, 1], ..., w[27, 0, 1], 0, 0, 0, 0, ..., w[16, 0, 3], ..., w[27, 0, 3], 0, 0, 0, 0
							 | 
						||
| 
								 | 
							
								 *                zeros are padded so that the distance between w[i, 0, k] and w[i, 0, k + 1] is always 16 bytes
							 | 
						||
| 
								 | 
							
								 *                otherwise, weight is a 3D-tensor of size [out_channel, in_channel / group, kernel_length], 
							 | 
						||
| 
								 | 
							
								 *                it could be divided by groups, in each group, it is a 3D-tensor of size [out_channel / group, in_channel / group, kernel_length]
							 | 
						||
| 
								 | 
							
								 *                weight should be arranged in the form below, as an example, we set kernel_length = 2, in_channel = 4, out_channel = 56, group = 2
							 | 
						||
| 
								 | 
							
								 *                (noticing in_channel / group = 2, out_channel / group = 28, so w[0:28, :, :] is for the 1st group, w[28:55, :, :] is for the 2nd group)
							 | 
						||
| 
								 | 
							
								 *                w[0, 0, 0], w[1, 0, 0], ..., w[15, 0, 0], w[0, 0, 1], w[1, 0, 1], ..., w[15, 0, 1], ..., w[0, 0, 3], ..., w[15, 0, 3],
							 | 
						||
| 
								 | 
							
								 *                w[0, 1, 0], w[1, 1, 0], ..., w[15, 1, 0], w[0, 1, 1], w[1, 1, 1], ..., w[15, 1, 1], ..., w[0, 1, 3], ..., w[15, 1, 3],
							 | 
						||
| 
								 | 
							
								 *                w[16, 0, 0], ..., w[27, 0, 0], 0, 0, 0, 0, w[16, 0, 1], ..., w[27, 0, 1], 0, 0, 0, 0, ..., w[16, 0, 3], ..., w[27, 0, 3], 0, 0, 0, 0
							 | 
						||
| 
								 | 
							
								 *                w[16, 1, 0], ..., w[27, 1, 0], 0, 0, 0, 0, w[16, 1, 1], ..., w[27, 1, 1], 0, 0, 0, 0, ..., w[16, 1, 3], ..., w[27, 1, 3], 0, 0, 0, 0
							 | 
						||
| 
								 | 
							
								 *                w[28, 0, 0], w[29, 0, 0], ..., w[43, 0, 0], w[28, 0, 1], w[29, 0, 1], ..., w[43, 0, 1], ..., w[28, 0, 3], ..., w[43, 0, 3],
							 | 
						||
| 
								 | 
							
								 *                w[28, 1, 0], w[29, 1, 0], ..., w[43, 1, 0], w[28, 1, 1], w[29, 1, 1], ..., w[43, 1, 1], ..., w[28, 1, 3], ..., w[43, 1, 3],
							 | 
						||
| 
								 | 
							
								 *                w[44, 0, 0], ..., w[55, 0, 0], 0, 0, 0, 0, w[44, 0, 1], ..., w[55, 0, 1], 0, 0, 0, 0, ..., w[44, 0, 3], ..., w[55, 0, 3], 0, 0, 0, 0
							 | 
						||
| 
								 | 
							
								 *                w[44, 1, 0], ..., w[55, 1, 0], 0, 0, 0, 0, w[44, 1, 1], ..., w[55, 1, 1], 0, 0, 0, 0, ..., w[44, 1, 3], ..., w[55, 1, 3], 0, 0, 0, 0
							 | 
						||
| 
								 | 
							
								 *                zeros are padded here to guarantee the distance between w[i, j, k] and w[i, j, k + 1] in memory is 16 bytes
							 | 
						||
| 
								 | 
							
								 * @param bias: where the bias is put, if bias is not needed, this pointer should be set to NULL
							 | 
						||
| 
								 | 
							
								 * @param bean: pointer to the config, whose description is shown in the beginning
							 | 
						||
| 
								 | 
							
								 * @param output_right_shift: the right shift number shown in the description of this function
							 | 
						||
| 
								 | 
							
								 * @param bias_left_shift: the left shift number shown in the description of this function
							 | 
						||
| 
								 | 
							
								 * @return 0 -- succeed
							 | 
						||
| 
								 | 
							
								 * @return 1 -- in_channel is 0
							 | 
						||
| 
								 | 
							
								 * @return 2 -- out_channel is 0
							 | 
						||
| 
								 | 
							
								 * @return 3 -- in_length is 0
							 | 
						||
| 
								 | 
							
								 * @return 4 -- kernel_length is 0
							 | 
						||
| 
								 | 
							
								 * @return 5 -- stride is 0
							 | 
						||
| 
								 | 
							
								 * @return 6 -- dilation is 0
							 | 
						||
| 
								 | 
							
								 * @return 7 -- group is 0
							 | 
						||
| 
								 | 
							
								 * @return 8 -- in_channel % group is not 0
							 | 
						||
| 
								 | 
							
								 * @return 9 -- out_channel % group is not 0
							 | 
						||
| 
								 | 
							
								 * @return 10 -- after calculation, output_length is not a positive number
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								uint8_t conv1d_int8_to_int8_weight_8bit_bias_shift(int8_t *in, int8_t *out, 
							 | 
						||
| 
								 | 
							
								    int8_t *weight, int8_t *bias, struct conv1d_config_bean *bean, 
							 | 
						||
| 
								 | 
							
								    uint8_t output_right_shift, uint8_t bias_left_shift);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* @brief conv1d_int16_to_int16_weight_16bit() - 1-dimension conv operation as below:
							 | 
						||
| 
								 | 
							
								          output[c, l] = (sum(input[i, l * stride + k * dilation - padding_left] * weight[c, i, k]) + bias[c]) >> output_right_shift
							 | 
						||
| 
								 | 
							
								          where the sum is done on index i and k.
							 | 
						||
| 
								 | 
							
								          saturation included, i.e. output elements out of range [-32768, 32767] will be set to -32768 or 32767
							 | 
						||
| 
								 | 
							
								 * @param in: where the input, a 2D-tensor of size [in_channel, in_length], is put
							 | 
						||
| 
								 | 
							
								 *            input is arranged in the form below, as an example, we set in_channel = 14, in_length = 10
							 | 
						||
| 
								 | 
							
								 *            input[0, 0], input[1, 0], input[2, 0], ..., input[13, 0], 0, 0, 
							 | 
						||
| 
								 | 
							
								 *            input[0, 1], input[1, 1], ..., input[13, 1], 0, 0, ..., 
							 | 
						||
| 
								 | 
							
								 *            input[0, 9], ..., input[13, 9], 0, 0
							 | 
						||
| 
								 | 
							
								 *            zeros are padded so that the distance between input[i, j] and input[i, j + 1] is ceil(in_channel / 8.0) * 16 bytes
							 | 
						||
| 
								 | 
							
								 * @param out: where the output, a 2D-tensor, of size [out_channel, out_length] will be put, output will be arranged in the same form as input's
							 | 
						||
| 
								 | 
							
								 *             where out_length = 1 + floor((in_length + padding_left + padding_right - (kernel_length - 1) * dilation - 1) / stride)
							 | 
						||
| 
								 | 
							
								 * @param weight: where the weight is put.
							 | 
						||
| 
								 | 
							
								 *                if in_channel, out_channel, and group are the same, weight is a 3D-tensor of size [out_channel, 1, kernel_length].
							 | 
						||
| 
								 | 
							
								 *                it should be arranged in the form below, as an example, we set kernel_length = 4, in_channel = 14, out_channel = 14
							 | 
						||
| 
								 | 
							
								 *                w[0, 0, 0], w[1, 0, 0], ..., w[7, 0, 0], w[0, 0, 1], w[1, 0, 1], ..., w[7, 0, 1], ..., w[0, 0, 3], ..., w[7, 0, 3],
							 | 
						||
| 
								 | 
							
								 *                w[8, 0, 0], ..., w[13, 0, 0], 0, 0, 0, 0, w[8, 0, 1], ..., w[13, 0, 1], 0, 0, 0, 0, ..., w[8, 0, 3], ..., w[13, 0, 3], 0, 0, 0, 0
							 | 
						||
| 
								 | 
							
								 *                zeros are padded so that the distance between w[i, 0, k] and w[i, 0, k + 1] is always 16 bytes
							 | 
						||
| 
								 | 
							
								 *                otherwise, weight is a 3D-tensor of size [out_channel, in_channel / group, kernel_length], 
							 | 
						||
| 
								 | 
							
								 *                it could be divided by groups, in each group, it is a 3D-tensor of size [out_channel / group, in_channel / group, kernel_length]
							 | 
						||
| 
								 | 
							
								 *                weight should be arranged in the form below, as an example, we set kernel_length = 2, in_channel = 4, out_channel = 28, group = 2
							 | 
						||
| 
								 | 
							
								 *                (noticing in_channel / group = 2, out_channel / group = 14, so w[0:14, :, :] is for the 1st group, w[14:27, :, :] is for the 2nd group)
							 | 
						||
| 
								 | 
							
								 *                w[0, 0, 0], w[1, 0, 0], ..., w[7, 0, 0], w[0, 0, 1], w[1, 0, 1], ..., w[7, 0, 1], ..., w[0, 0, 3], ..., w[7, 0, 3],
							 | 
						||
| 
								 | 
							
								 *                w[0, 1, 0], w[1, 1, 0], ..., w[7, 1, 0], w[0, 1, 1], w[1, 1, 1], ..., w[7, 1, 1], ..., w[0, 1, 3], ..., w[7, 1, 3],
							 | 
						||
| 
								 | 
							
								 *                w[8, 0, 0], ..., w[13, 0, 0], 0, 0, w[8, 0, 1], ..., w[13, 0, 1], 0, 0, ..., w[8, 0, 3], ..., w[13, 0, 3], 0, 0
							 | 
						||
| 
								 | 
							
								 *                w[8, 1, 0], ..., w[13, 1, 0], 0, 0, w[8, 1, 1], ..., w[13, 1, 1], 0, 0, ..., w[8, 1, 3], ..., w[13, 1, 3], 0, 0
							 | 
						||
| 
								 | 
							
								 *                w[14, 0, 0], w[15, 0, 0], ..., w[21, 0, 0], w[14, 0, 1], w[15, 0, 1], ..., w[21, 0, 1], ..., w[14, 0, 3], ..., w[21, 0, 3],
							 | 
						||
| 
								 | 
							
								 *                w[14, 1, 0], w[15, 1, 0], ..., w[21, 1, 0], w[14, 1, 1], w[15, 1, 1], ..., w[21, 1, 1], ..., w[14, 1, 3], ..., w[21, 1, 3],
							 | 
						||
| 
								 | 
							
								 *                w[22, 0, 0], ..., w[27, 0, 0], 0, 0, w[22, 0, 1], ..., w[27, 0, 1], 0, 0, ..., w[22, 0, 3], ..., w[27, 0, 3], 0, 0
							 | 
						||
| 
								 | 
							
								 *                w[22, 1, 0], ..., w[27, 1, 0], 0, 0, w[22, 1, 1], ..., w[27, 1, 1], 0, 0, ..., w[22, 1, 3], ..., w[27, 1, 3], 0, 0
							 | 
						||
| 
								 | 
							
								 *                zeros are padded here to guarantee the distance between w[i, j, k] and w[i, j, k + 1] in memory is 16 bytes
							 | 
						||
| 
								 | 
							
								 * @param bias: where the bias is put, if bias is not needed, this pointer should be set to NULL
							 | 
						||
| 
								 | 
							
								 * @param bean: pointer to the config, whose description is shown in the beginning
							 | 
						||
| 
								 | 
							
								 * @param output_right_shift: the right shift number shown in the description of this function
							 | 
						||
| 
								 | 
							
								 * @return 0 -- succeed
							 | 
						||
| 
								 | 
							
								 * @return 1 -- in_channel is 0
							 | 
						||
| 
								 | 
							
								 * @return 2 -- out_channel is 0
							 | 
						||
| 
								 | 
							
								 * @return 3 -- in_length is 0
							 | 
						||
| 
								 | 
							
								 * @return 4 -- kernel_length is 0
							 | 
						||
| 
								 | 
							
								 * @return 5 -- stride is 0
							 | 
						||
| 
								 | 
							
								 * @return 6 -- dilation is 0
							 | 
						||
| 
								 | 
							
								 * @return 7 -- group is 0
							 | 
						||
| 
								 | 
							
								 * @return 8 -- in_channel % group is not 0
							 | 
						||
| 
								 | 
							
								 * @return 9 -- out_channel % group is not 0
							 | 
						||
| 
								 | 
							
								 * @return 10 -- after calculation, output_length is not a positive number
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								uint8_t conv1d_int16_to_int16_weight_16bit(int16_t *in, int16_t *out, 
							 | 
						||
| 
								 | 
							
								    int16_t *weight, int64_t *bias, struct conv1d_config_bean *bean, 
							 | 
						||
| 
								 | 
							
								    uint8_t output_right_shift);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* @brief conv1d_int16_to_int16_weight_16bit_bias_shift() - 1-dimension conv operation as below:
							 | 
						||
| 
								 | 
							
								          output[c, l] = (sum(input[i, l * stride + k * dilation - padding_left] * weight[c, i, k]) + bias[c] << bias_left_shift) >> output_right_shift
							 | 
						||
| 
								 | 
							
								          where the sum is done on index i and k.
							 | 
						||
| 
								 | 
							
								          saturation included, i.e. output elements out of range [-32768, 32767] will be set to -32768 or 32767
							 | 
						||
| 
								 | 
							
								 * @param in: where the input, a 2D-tensor of size [in_channel, in_length], is put
							 | 
						||
| 
								 | 
							
								 *            input is arranged in the form below, as an example, we set in_channel = 14, in_length = 10
							 | 
						||
| 
								 | 
							
								 *            input[0, 0], input[1, 0], input[2, 0], ..., input[13, 0], 0, 0, 
							 | 
						||
| 
								 | 
							
								 *            input[0, 1], input[1, 1], ..., input[13, 1], 0, 0, ..., 
							 | 
						||
| 
								 | 
							
								 *            input[0, 9], ..., input[13, 9], 0, 0
							 | 
						||
| 
								 | 
							
								 *            zeros are padded so that the distance between input[i, j] and input[i, j + 1] is ceil(in_channel / 8.0) * 16 bytes
							 | 
						||
| 
								 | 
							
								 * @param out: where the output, a 2D-tensor, of size [out_channel, out_length] will be put, output will be arranged in the same form as input's
							 | 
						||
| 
								 | 
							
								 *             where out_length = 1 + floor((in_length + padding_left + padding_right - (kernel_length - 1) * dilation - 1) / stride)
							 | 
						||
| 
								 | 
							
								 * @param weight: where the weight is put.
							 | 
						||
| 
								 | 
							
								 *                if in_channel, out_channel, and group are the same, weight is a 3D-tensor of size [out_channel, 1, kernel_length].
							 | 
						||
| 
								 | 
							
								 *                it should be arranged in the form below, as an example, we set kernel_length = 4, in_channel = 14, out_channel = 14
							 | 
						||
| 
								 | 
							
								 *                w[0, 0, 0], w[1, 0, 0], ..., w[7, 0, 0], w[0, 0, 1], w[1, 0, 1], ..., w[7, 0, 1], ..., w[0, 0, 3], ..., w[7, 0, 3],
							 | 
						||
| 
								 | 
							
								 *                w[8, 0, 0], ..., w[13, 0, 0], 0, 0, 0, 0, w[8, 0, 1], ..., w[13, 0, 1], 0, 0, 0, 0, ..., w[8, 0, 3], ..., w[13, 0, 3], 0, 0, 0, 0
							 | 
						||
| 
								 | 
							
								 *                zeros are padded so that the distance between w[i, 0, k] and w[i, 0, k + 1] is always 16 bytes
							 | 
						||
| 
								 | 
							
								 *                otherwise, weight is a 3D-tensor of size [out_channel, in_channel / group, kernel_length], 
							 | 
						||
| 
								 | 
							
								 *                it could be divided by groups, in each group, it is a 3D-tensor of size [out_channel / group, in_channel / group, kernel_length]
							 | 
						||
| 
								 | 
							
								 *                weight should be arranged in the form below, as an example, we set kernel_length = 2, in_channel = 4, out_channel = 28, group = 2
							 | 
						||
| 
								 | 
							
								 *                (noticing in_channel / group = 2, out_channel / group = 14, so w[0:14, :, :] is for the 1st group, w[14:27, :, :] is for the 2nd group)
							 | 
						||
| 
								 | 
							
								 *                w[0, 0, 0], w[1, 0, 0], ..., w[7, 0, 0], w[0, 0, 1], w[1, 0, 1], ..., w[7, 0, 1], ..., w[0, 0, 3], ..., w[7, 0, 3],
							 | 
						||
| 
								 | 
							
								 *                w[0, 1, 0], w[1, 1, 0], ..., w[7, 1, 0], w[0, 1, 1], w[1, 1, 1], ..., w[7, 1, 1], ..., w[0, 1, 3], ..., w[7, 1, 3],
							 | 
						||
| 
								 | 
							
								 *                w[8, 0, 0], ..., w[13, 0, 0], 0, 0, w[8, 0, 1], ..., w[13, 0, 1], 0, 0, ..., w[8, 0, 3], ..., w[13, 0, 3], 0, 0
							 | 
						||
| 
								 | 
							
								 *                w[8, 1, 0], ..., w[13, 1, 0], 0, 0, w[8, 1, 1], ..., w[13, 1, 1], 0, 0, ..., w[8, 1, 3], ..., w[13, 1, 3], 0, 0
							 | 
						||
| 
								 | 
							
								 *                w[14, 0, 0], w[15, 0, 0], ..., w[21, 0, 0], w[14, 0, 1], w[15, 0, 1], ..., w[21, 0, 1], ..., w[14, 0, 3], ..., w[21, 0, 3],
							 | 
						||
| 
								 | 
							
								 *                w[14, 1, 0], w[15, 1, 0], ..., w[21, 1, 0], w[14, 1, 1], w[15, 1, 1], ..., w[21, 1, 1], ..., w[14, 1, 3], ..., w[21, 1, 3],
							 | 
						||
| 
								 | 
							
								 *                w[22, 0, 0], ..., w[27, 0, 0], 0, 0, w[22, 0, 1], ..., w[27, 0, 1], 0, 0, ..., w[22, 0, 3], ..., w[27, 0, 3], 0, 0
							 | 
						||
| 
								 | 
							
								 *                w[22, 1, 0], ..., w[27, 1, 0], 0, 0, w[22, 1, 1], ..., w[27, 1, 1], 0, 0, ..., w[22, 1, 3], ..., w[27, 1, 3], 0, 0
							 | 
						||
| 
								 | 
							
								 *                zeros are padded here to guarantee the distance between w[i, j, k] and w[i, j, k + 1] in memory is 16 bytes
							 | 
						||
| 
								 | 
							
								 * @param bias: where the bias is put, if bias is not needed, this pointer should be set to NULL
							 | 
						||
| 
								 | 
							
								 * @param bean: pointer to the config, whose description is shown in the beginning
							 | 
						||
| 
								 | 
							
								 * @param output_right_shift: the right shift number shown in the description of this function
							 | 
						||
| 
								 | 
							
								 * @param bias_left_shift: the left shift number shown in the description of this function
							 | 
						||
| 
								 | 
							
								 * @return 0 -- succeed
							 | 
						||
| 
								 | 
							
								 * @return 1 -- in_channel is 0
							 | 
						||
| 
								 | 
							
								 * @return 2 -- out_channel is 0
							 | 
						||
| 
								 | 
							
								 * @return 3 -- in_length is 0
							 | 
						||
| 
								 | 
							
								 * @return 4 -- kernel_length is 0
							 | 
						||
| 
								 | 
							
								 * @return 5 -- stride is 0
							 | 
						||
| 
								 | 
							
								 * @return 6 -- dilation is 0
							 | 
						||
| 
								 | 
							
								 * @return 7 -- group is 0
							 | 
						||
| 
								 | 
							
								 * @return 8 -- in_channel % group is not 0
							 | 
						||
| 
								 | 
							
								 * @return 9 -- out_channel % group is not 0
							 | 
						||
| 
								 | 
							
								 * @return 10 -- after calculation, output_length is not a positive number
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								uint8_t conv1d_int16_to_int16_weight_16bit_bias_shift(int16_t *in, int16_t *out, 
							 | 
						||
| 
								 | 
							
								    int16_t *weight, int16_t *bias, struct conv1d_config_bean *bean, 
							 | 
						||
| 
								 | 
							
								    uint8_t output_right_shift, uint8_t bias_left_shift);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/**
							 | 
						||
| 
								 | 
							
								  * @}
							 | 
						||
| 
								 | 
							
								  */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#ifdef __cplusplus
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								#endif
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								#endif
							 |