From 4da5de707b53389c8fe35f6b448f3d1bfe7c91e8 Mon Sep 17 00:00:00 2001 From: hathach Date: Wed, 20 Nov 2024 18:03:42 +0700 Subject: [PATCH] have p4 dma somewhat working but having issue with buffer that does not occupy the whole cache line --- .../components/tinyusb_src/CMakeLists.txt | 8 ++++ src/device/dcd.h | 6 +-- src/device/usbd.c | 20 ++++++--- src/portable/synopsys/dwc2/dcd_dwc2.c | 42 ++++++++++++++---- src/portable/synopsys/dwc2/dwc2_common.c | 2 + src/portable/synopsys/dwc2/dwc2_esp32.h | 44 +++++++++++++++++++ 6 files changed, 104 insertions(+), 18 deletions(-) diff --git a/hw/bsp/espressif/components/tinyusb_src/CMakeLists.txt b/hw/bsp/espressif/components/tinyusb_src/CMakeLists.txt index 900f620fd..9f55c8d5a 100644 --- a/hw/bsp/espressif/components/tinyusb_src/CMakeLists.txt +++ b/hw/bsp/espressif/components/tinyusb_src/CMakeLists.txt @@ -15,6 +15,13 @@ list(APPEND compile_definitions BOARD_TUH_MAX_SPEED=${RHPORT_HOST_SPEED} ) +if (target STREQUAL esp32p4) + # P4 change alignment to 64 (DCache line size) for possible DMA configuration + list(APPEND compile_definitions + CFG_TUSB_MEM_ALIGN=__attribute__\(\(aligned\(64\)\)\) + ) +endif () + list(APPEND srcs # common ${tusb_src}/tusb.c @@ -68,6 +75,7 @@ endif() idf_component_register(SRCS ${srcs} INCLUDE_DIRS ${tusb_src} REQUIRES src + PRIV_REQUIRES esp_mm ) target_compile_definitions(${COMPONENT_LIB} PUBLIC ${compile_definitions}) diff --git a/src/device/dcd.h b/src/device/dcd.h index d01f82e01..0ecdec4ed 100644 --- a/src/device/dcd.h +++ b/src/device/dcd.h @@ -93,15 +93,15 @@ typedef struct TU_ATTR_ALIGNED(4) { // clean/flush data cache: write cache -> memory. // Required before an DMA TX transfer to make sure data is in memory -void dcd_dcache_clean(void const* addr, uint32_t data_size) TU_ATTR_WEAK; +void dcd_dcache_clean(const void* addr, uint32_t data_size); // invalidate data cache: mark cache as invalid, next read will read from memory // Required BOTH before and after an DMA RX transfer -void dcd_dcache_invalidate(void const* addr, uint32_t data_size) TU_ATTR_WEAK; +void dcd_dcache_invalidate(const void* addr, uint32_t data_size); // clean and invalidate data cache // Required before an DMA transfer where memory is both read/write by DMA -void dcd_dcache_clean_invalidate(void const* addr, uint32_t data_size) TU_ATTR_WEAK; +void dcd_dcache_clean_invalidate(const void* addr, uint32_t data_size); //--------------------------------------------------------------------+ // Controller API diff --git a/src/device/usbd.c b/src/device/usbd.c index a730b745b..f485b6872 100644 --- a/src/device/usbd.c +++ b/src/device/usbd.c @@ -46,9 +46,7 @@ // Weak stubs: invoked if no strong implementation is available //--------------------------------------------------------------------+ TU_ATTR_WEAK void tud_event_hook_cb(uint8_t rhport, uint32_t eventid, bool in_isr) { - (void) rhport; - (void) eventid; - (void) in_isr; + (void) rhport; (void) eventid; (void) in_isr; } TU_ATTR_WEAK void tud_sof_cb(uint32_t frame_count) { @@ -82,9 +80,7 @@ TU_ATTR_WEAK void tud_resume_cb(void) { } TU_ATTR_WEAK bool tud_vendor_control_xfer_cb(uint8_t rhport, uint8_t stage, tusb_control_request_t const* request) { - (void) rhport; - (void) stage; - (void) request; + (void) rhport; (void) stage; (void) request; return false; } @@ -101,6 +97,18 @@ TU_ATTR_WEAK void dcd_disconnect(uint8_t rhport) { (void) rhport; } +TU_ATTR_WEAK void dcd_dcache_clean(const void* addr, uint32_t data_size) { + (void) addr; (void) data_size; +} + +TU_ATTR_WEAK void dcd_dcache_invalidate(const void* addr, uint32_t data_size) { + (void) addr; (void) data_size; +} + +TU_ATTR_WEAK void dcd_dcache_clean_invalidate(const void* addr, uint32_t data_size) { + (void) addr; (void) data_size; +} + //--------------------------------------------------------------------+ // Device Data //--------------------------------------------------------------------+ diff --git a/src/portable/synopsys/dwc2/dcd_dwc2.c b/src/portable/synopsys/dwc2/dcd_dwc2.c index 2b3ef096f..a4ebacfcb 100644 --- a/src/portable/synopsys/dwc2/dcd_dwc2.c +++ b/src/portable/synopsys/dwc2/dcd_dwc2.c @@ -46,8 +46,7 @@ //--------------------------------------------------------------------+ // MACRO TYPEDEF CONSTANT ENUM //--------------------------------------------------------------------+ - -static CFG_TUD_MEM_SECTION TU_ATTR_ALIGNED(4) uint32_t _setup_packet[2]; +static CFG_TUD_MEM_SECTION CFG_TUD_MEM_ALIGN uint32_t _setup_packet[2]; typedef struct { uint8_t* buffer; @@ -73,6 +72,25 @@ static bool _sof_en; //-------------------------------------------------------------------- // DMA //-------------------------------------------------------------------- +#if DWC2_ENABLE_MEM_CACHE +void dcd_dcache_clean(const void* addr, uint32_t data_size) { + if (addr && data_size) { + dwc2_dcache_clean(addr, data_size); + } +} + +void dcd_dcache_invalidate(const void* addr, uint32_t data_size) { + if (addr && data_size) { + dwc2_dcache_invalidate(addr, data_size); + } +} + +void dcd_dcache_clean_invalidate(const void* addr, uint32_t data_size) { + if (addr && data_size) { + dwc2_dcache_clean_invalidate(addr, data_size); + } +} +#endif TU_ATTR_ALWAYS_INLINE static inline bool dma_device_enabled(const dwc2_regs_t* dwc2) { (void) dwc2; @@ -180,7 +198,7 @@ static bool dfifo_alloc(uint8_t rhport, uint8_t ep_addr, uint16_t packet_size) { // Check if free space is available TU_ASSERT(_dfifo_top >= fifo_size + dwc2->grxfsiz); _dfifo_top -= fifo_size; - TU_LOG(DWC2_DEBUG, " TX FIFO %u: allocated %u words at offset %u\r\n", epnum, fifo_size, _dfifo_top); + // TU_LOG(DWC2_DEBUG, " TX FIFO %u: allocated %u words at offset %u\r\n", epnum, fifo_size, _dfifo_top); // Both TXFD and TXSA are in unit of 32-bit words. if (epnum == 0) { @@ -348,14 +366,18 @@ static void edpt_schedule_packets(uint8_t rhport, const uint8_t epnum, const uin const bool is_dma = dma_device_enabled(dwc2); if(is_dma) { + if (dir == TUSB_DIR_IN && total_bytes != 0) { + dcd_dcache_clean(xfer->buffer, total_bytes); + } dep->diepdma = (uintptr_t) xfer->buffer; - } + dep->diepctl = depctl.value; // enable endpoint + } else { + dep->diepctl = depctl.value; // enable endpoint - dep->diepctl = depctl.value; // enable endpoint - - // Slave: enable tx fifo empty interrupt only if there is data. Note must after depctl enable - if (!is_dma && dir == TUSB_DIR_IN && total_bytes != 0) { - dwc2->diepempmsk |= (1 << epnum); + // Enable tx fifo empty interrupt only if there is data. Note must after depctl enable + if (dir == TUSB_DIR_IN && total_bytes != 0) { + dwc2->diepempmsk |= (1 << epnum); + } } } @@ -847,6 +869,7 @@ static void handle_epout_dma(uint8_t rhport, uint8_t epnum, dwc2_doepint_t doepi if (doepint_bm.setup_phase_done) { dma_setup_prepare(rhport); + dcd_dcache_invalidate(_setup_packet, 8); dcd_event_setup_received(rhport, (uint8_t*) _setup_packet, true); return; } @@ -873,6 +896,7 @@ static void handle_epout_dma(uint8_t rhport, uint8_t epnum, dwc2_doepint_t doepi dma_setup_prepare(rhport); } + dcd_dcache_invalidate(xfer->buffer, xfer->total_len); dcd_event_xfer_complete(rhport, epnum, xfer->total_len, XFER_RESULT_SUCCESS, true); } } diff --git a/src/portable/synopsys/dwc2/dwc2_common.c b/src/portable/synopsys/dwc2/dwc2_common.c index ef155c8f7..f80ae9acb 100644 --- a/src/portable/synopsys/dwc2/dwc2_common.c +++ b/src/portable/synopsys/dwc2/dwc2_common.c @@ -229,6 +229,8 @@ bool dwc2_core_init(uint8_t rhport, bool is_highspeed, bool is_dma) { dwc2->gotgint = 0xFFFFFFFFU; dwc2->gintmsk = 0; + TU_LOG(DWC2_COMMON_DEBUG, "DMA = %u\r\n", is_dma); + if (is_dma) { // DMA seems to be only settable after a core reset, and not possible to switch on-the-fly dwc2->gahbcfg |= GAHBCFG_DMAEN | GAHBCFG_HBSTLEN_2; diff --git a/src/portable/synopsys/dwc2/dwc2_esp32.h b/src/portable/synopsys/dwc2/dwc2_esp32.h index 42ab4b80f..24ff80bce 100644 --- a/src/portable/synopsys/dwc2/dwc2_esp32.h +++ b/src/portable/synopsys/dwc2/dwc2_esp32.h @@ -111,6 +111,50 @@ TU_ATTR_ALWAYS_INLINE static inline void dwc2_phy_update(dwc2_regs_t* dwc2, uint // maybe usb_utmi_hal_disable() } +//--------------------------------------------------------------------+ +// Data Cache +//--------------------------------------------------------------------+ +#if defined(SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE) && SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE +#include "sdkconfig.h" +#include "hal/cache_hal.h" +#include "esp_cache.h" +#include "esp_log.h" + +#define DWC2_MEM_CACHE_LINE_SIZE CONFIG_CACHE_L1_CACHE_LINE_SIZE +#define DWC2_ENABLE_MEM_CACHE 1 + +TU_ATTR_ALWAYS_INLINE static inline uint32_t round_up_to_cache_line_size(uint32_t size) { + if (size & (CONFIG_CACHE_L1_CACHE_LINE_SIZE-1)) { + size = (size & ~(CONFIG_CACHE_L1_CACHE_LINE_SIZE-1)) + CONFIG_CACHE_L1_CACHE_LINE_SIZE; + } + return size; +} + +TU_ATTR_ALWAYS_INLINE static inline void dwc2_dcache_clean(const void* addr, uint32_t data_size) { + // round up to cache line size + const int flag = ESP_CACHE_MSYNC_FLAG_TYPE_DATA | ESP_CACHE_MSYNC_FLAG_DIR_C2M; + data_size = round_up_to_cache_line_size(data_size); + //ESP_EARLY_LOGI("ESP32_DWC", "dcache clean, addr 0x%"PRIx32", size %d (%s)", (uintptr_t)addr, data_size); + assert(ESP_OK == esp_cache_msync((void*)addr, data_size, flag)); +} + +TU_ATTR_ALWAYS_INLINE static inline void dwc2_dcache_invalidate(const void* addr, uint32_t data_size) { + const int flag = ESP_CACHE_MSYNC_FLAG_TYPE_DATA | ESP_CACHE_MSYNC_FLAG_DIR_M2C; + data_size = round_up_to_cache_line_size(data_size); + //ESP_EARLY_LOGI("ESP32_DWC", "dcache invalidate, addr 0x%"PRIx32", size %d (%s)", (uintptr_t)addr, data_size); + assert(ESP_OK == esp_cache_msync((void*)addr, data_size, flag)); +} + +TU_ATTR_ALWAYS_INLINE static inline void dwc2_dcache_clean_invalidate(const void* addr, uint32_t data_size) { + const int flag = ESP_CACHE_MSYNC_FLAG_TYPE_DATA | ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_DIR_M2C; + data_size = round_up_to_cache_line_size(data_size); + //ESP_EARLY_LOGI("ESP32_DWC", "dcache clean_invalidate, addr 0x%"PRIx32", size %d (%s)", (uintptr_t)addr, data_size); + assert(ESP_OK == esp_cache_msync((void*)addr, data_size, flag)); +} + +#endif // SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE + + #ifdef __cplusplus } #endif