kunlun/export/inc/cnn/cnn_pooling.h

#ifndef CNN_AVG_POOLING
#define CNN_AVG_POOLING
/* os shim includes */
#include "os_types_api.h"

#ifdef __cplusplus
extern "C" {
#endif

/** \defgroup cnn pool API
  * @brief cnn pool APIs
  * These functions could be used for calculating pooling
  * For any features, every column will occupy a size which is a multiple of 32 bytes. Here column means the last dimension of a tensor
  * For example, if a tensor of type int8_t named A is of size 4 * 30 * 300, it will be saved in the memory of this form so that every column occupies 320 bytes:
  * A[0][0][0], A[0][0][1], ..., A[0][0][299], 0, 0, 0, ..., 0, A[0][1][0], A[0][1][1], ..., A[0][1][299], 0, 0, 0, 0, ..., 0, ..., A[0][29][0], ..., A[0][29][299], 0, ..., 0, A[1][0][0], ...
  * Set out_sep_mode to 1 can accelerate the calculate process, however, in this mode, cpu cannot access the output, and these conditions below must be satisfied:
  *     (1) this layer should be either depthwise or pointwise, innerproduct (or fc) could be regarded as pointwise, pooling could be regarded as depthwise
  *     (2) if this layer is pointwise, next layer should be depthwise; if this layer is depthwise, next layer should be pointwise
  *     (3) width of output plus left padding of next layer should be no larger than 16 bytes
  *     (4) stride of next layer should be 1
  *     (5) (height + next_pad_up) * channel of output should be no larger than 8192
  * If out_sep_mode of previous layer is set to 1, in_sep_mode of this layer should be 1
  *
  *
  */

/** @addtogroup cnn_pool_APIs
  * @{
  */


struct cnn_pooling_config_bean
{
    uint16_t in_channel; // how many channels of input
    uint16_t in_height; // height of input
    uint16_t in_width; // output of input
    uint16_t out_height; // don't need to be setted, this is used to save the height of output when the calculation finishes
    uint16_t out_width; // don't need to be setted, this is used to save the height of output when the calculation finishes
    uint8_t kernel_size_h; // height of the convolving kernel
    uint8_t kernel_size_w; // width of the convolving kernel
    uint8_t stride;
    int8_t padding_to_same; // non-zero: output size will be ceil((input size + 2 * padding)/stride) by padding input. zero : output size will be calculated by config
    uint8_t padding; // zero-padding added to both sides of the input
    uint8_t input_signed; // whether input is signed
    uint8_t mac_8bit; // non_zero: 8bit mode; zero: 16bit mode
    uint8_t count_include_pad; // when non-0, include the zero-padding in the averaging calculation
    uint8_t ceil_mode; // when non-0, use ceil instead of floor to compute the output shape. only valid when padding_to_same is zero
	uint8_t avg; // non-zero: avg pooling, zero: max pooling
    uint8_t out_shift; // the fraction of output minus the fraction of input, you can set this to a positive number for a higher precision, only valid in avgpooling
    uint8_t input_iram; // nonzero - read input from iram, 0 - read input from ddr
    uint8_t output_iram; // nonzero - put output into iram, 0 - put output into ddr
    uint8_t in_sep_mode; // whether read input from iram as separable conv mode
    uint8_t out_sep_mode; // whether put output into iram as separable conv mode
};

/* @brief cnn_pool_2d() - do a pooling process using cnn engine, it is necessary that input_addr, output_addr are multiple of 256
 * @parem input_addr: where the input is put
 * @parem output_addr: where the output will be put
 * @parem config: config info of the pooling process
 * @return 0 -- succeed
 * @return 1 -- input channel is 0
 * @return 2 -- input height is 0
 * @return 3 -- input width is 0
 * @return 4 -- input height is so small that output height will be 0
 * @return 5 -- input width is so small that output width will be 0
 * @return 7 -- kernel is 0
 * @return 8 -- kernel is too large
 * @return 9 -- stride is 0
 * @return 10 -- stride is too large
 */
uint8_t cnn_pool_2d(uint32_t input_addr, uint32_t output_addr, 
    struct cnn_pooling_config_bean *config);

#ifdef __cplusplus
}
#endif

#endif
初始提交 2024-09-28 14:24:04 +08:00			`#ifndef CNN_AVG_POOLING`
			`#define CNN_AVG_POOLING`
			`/* os shim includes */`
			`#include "os_types_api.h"`

			`#ifdef __cplusplus`
			`extern "C" {`
			`#endif`

			`/** \defgroup cnn pool API`
			`* @brief cnn pool APIs`
			`* These functions could be used for calculating pooling`
			`* For any features, every column will occupy a size which is a multiple of 32 bytes. Here column means the last dimension of a tensor`
			`* For example, if a tensor of type int8_t named A is of size 4 * 30 * 300, it will be saved in the memory of this form so that every column occupies 320 bytes:`
			`* A[0][0][0], A[0][0][1], ..., A[0][0][299], 0, 0, 0, ..., 0, A[0][1][0], A[0][1][1], ..., A[0][1][299], 0, 0, 0, 0, ..., 0, ..., A[0][29][0], ..., A[0][29][299], 0, ..., 0, A[1][0][0], ...`
			`* Set out_sep_mode to 1 can accelerate the calculate process, however, in this mode, cpu cannot access the output, and these conditions below must be satisfied:`
			`* (1) this layer should be either depthwise or pointwise, innerproduct (or fc) could be regarded as pointwise, pooling could be regarded as depthwise`
			`* (2) if this layer is pointwise, next layer should be depthwise; if this layer is depthwise, next layer should be pointwise`
			`* (3) width of output plus left padding of next layer should be no larger than 16 bytes`
			`* (4) stride of next layer should be 1`
			`* (5) (height + next_pad_up) * channel of output should be no larger than 8192`
			`* If out_sep_mode of previous layer is set to 1, in_sep_mode of this layer should be 1`
			`*`
			`*`
			`*/`

			`/** @addtogroup cnn_pool_APIs`
			`* @{`
			`*/`


			`struct cnn_pooling_config_bean`
			`{`
			`uint16_t in_channel; // how many channels of input`
			`uint16_t in_height; // height of input`
			`uint16_t in_width; // output of input`
			`uint16_t out_height; // don't need to be setted, this is used to save the height of output when the calculation finishes`
			`uint16_t out_width; // don't need to be setted, this is used to save the height of output when the calculation finishes`
			`uint8_t kernel_size_h; // height of the convolving kernel`
			`uint8_t kernel_size_w; // width of the convolving kernel`
			`uint8_t stride;`
			`int8_t padding_to_same; // non-zero: output size will be ceil((input size + 2 * padding)/stride) by padding input. zero : output size will be calculated by config`
			`uint8_t padding; // zero-padding added to both sides of the input`
			`uint8_t input_signed; // whether input is signed`
			`uint8_t mac_8bit; // non_zero: 8bit mode; zero: 16bit mode`
			`uint8_t count_include_pad; // when non-0, include the zero-padding in the averaging calculation`
			`uint8_t ceil_mode; // when non-0, use ceil instead of floor to compute the output shape. only valid when padding_to_same is zero`
			`uint8_t avg; // non-zero: avg pooling, zero: max pooling`
			`uint8_t out_shift; // the fraction of output minus the fraction of input, you can set this to a positive number for a higher precision, only valid in avgpooling`
			`uint8_t input_iram; // nonzero - read input from iram, 0 - read input from ddr`
			`uint8_t output_iram; // nonzero - put output into iram, 0 - put output into ddr`
			`uint8_t in_sep_mode; // whether read input from iram as separable conv mode`
			`uint8_t out_sep_mode; // whether put output into iram as separable conv mode`
			`};`

			`/* @brief cnn_pool_2d() - do a pooling process using cnn engine, it is necessary that input_addr, output_addr are multiple of 256`
			`* @parem input_addr: where the input is put`
			`* @parem output_addr: where the output will be put`
			`* @parem config: config info of the pooling process`
			`* @return 0 -- succeed`
			`* @return 1 -- input channel is 0`
			`* @return 2 -- input height is 0`
			`* @return 3 -- input width is 0`
			`* @return 4 -- input height is so small that output height will be 0`
			`* @return 5 -- input width is so small that output width will be 0`
			`* @return 7 -- kernel is 0`
			`* @return 8 -- kernel is too large`
			`* @return 9 -- stride is 0`
			`* @return 10 -- stride is too large`
			`*/`
			`uint8_t cnn_pool_2d(uint32_t input_addr, uint32_t output_addr,`
			`struct cnn_pooling_config_bean *config);`

			`#ifdef __cplusplus`
			`}`
			`#endif`

			`#endif`