/****************************************************************************

  Copyright(c) 2019 by Aerospace C.Power (Chongqing) Microelectronics. ALL RIGHTS RESERVED.

  This Information is proprietary to Aerospace C.Power (Chongqing) Microelectronics and MAY NOT
  be copied by any method or incorporated into another program without
  the express written consent of Aerospace C.Power. This Information or any portion
  thereof remains the property of Aerospace C.Power. The Information contained herein
  is believed to be accurate and Aerospace C.Power assumes no responsibility or
  liability for its use in any way and conveys no license or title under
  any patent or copyright and makes no representation or warranty that this
  Information is free from patent or copyright infringement.

  ****************************************************************************/
#ifndef CNN_K3D_H
#define CNN_K3D_H
#include "os_types_api.h"

#ifdef __cplusplus
extern "C" {
#endif

/** \defgroup cnn k3d API
  * @brief cnn k3d APIs
  * These functions could be used for mid_filter or depth calculation
  * The formular for depth calculation is as below, where x is the input image, Z is the output, f_x_ir, L and d0, whose units are all millimeter, are constant independent from x, 
  *     Z = f_x_ir * L / (f_x_ir * L / d0 - x)
  * CNN engine will be used in these functions, except for cpu_prelu_ddr, cpu_prelu_before_depth, cpu_prelu_before_point, cpu_prelu_iram.
  * For any features, every column will occupy a size which is a multiple of 32 bytes. Here column means the last dimension of a tensor
  * For example, if a tensor of type int16_t named A is of size 4 * 30 * 300, it will be saved in the memory of this form so that every column occupies 608 bytes:
  * A[0][0][0], A[0][0][1], ..., A[0][0][299], 0, 0, 0, ..., 0, A[0][1][0], A[0][1][1], ..., A[0][1][299], 0, 0, 0, 0, ..., 0, ..., A[0][29][0], ..., A[0][29][299], 0, ..., 0, A[1][0][0], ...
  *
  *
  *
  */

/** @addtogroup cnn_k3d_APIs
  * @{
  */

enum cnn_k3d_driver_to_do {
    mid_filter_only = 2,
    mid_filter_and_depth = 3,
    depth_only = 4
};

struct cnn_k3d_config_bean {
    uint32_t input_addr; // where the input is put
    uint32_t output_addr; // where the output will be put
    uint16_t input_height; // height of input image
    uint16_t input_width; // width of input image
    uint16_t output_height; // height of output image, you don't need to set it, this will be set after calculation
    uint16_t output_width; // width of output image, you don't need to set it, this will be set after calculation 
    uint16_t padding_to_same; // 1: use "SAME" mode for padding, 0: use "VALID" mode for padding, ref: tensorflow doc
    enum cnn_k3d_driver_to_do to_do; // 2: middle filter only, 3: middle filter and depth calculation, 4: depth calculation only
    uint8_t feature_signed; // non-zero: input are signed numbers, zero: input are unsigned numbers, only valid when to_do is not 4 cause input need to be signed when to_do is 4
    uint8_t int8_mode; // non-zero: 8bit mode, zero: 16bit mode, only valid when to_do is 2
    int16_t F_L_d0_mu; // the value of f_x_ir * L / d0 in the formular for depth calculation, this number should be in form of S(16, 4), i.e. int16_t, with fraction of 4bits
    uint16_t F_L_mu; // the value of f_x_ir * L in the formular for depth calculation, this number should be in form of US(16, 1), i.e. uint16_t, with fraction of 1bits
};

/* @brief cnn_k3d_driver() - do a middle filter or depth calculation in 16bit mode, middle filter's kernel is 3
 * @param config: config info of the pooling process
 * @return 0 -- succeed
 * @return 2 -- input height is 0
 * @return 3 -- input width is 0
 */
uint8_t cnn_k3d_driver(struct cnn_k3d_config_bean *config);


/**
  * @}
  */
  
#ifdef __cplusplus
}
#endif

#endif