have p4 dma somewhat working but having issue with buffer that does not occupy the whole cache line

This commit is contained in:
hathach
2024-11-20 18:03:42 +07:00
parent 43a45f29cd
commit 4da5de707b
6 changed files with 104 additions and 18 deletions

View File

@@ -15,6 +15,13 @@ list(APPEND compile_definitions
BOARD_TUH_MAX_SPEED=${RHPORT_HOST_SPEED} BOARD_TUH_MAX_SPEED=${RHPORT_HOST_SPEED}
) )
if (target STREQUAL esp32p4)
# P4 change alignment to 64 (DCache line size) for possible DMA configuration
list(APPEND compile_definitions
CFG_TUSB_MEM_ALIGN=__attribute__\(\(aligned\(64\)\)\)
)
endif ()
list(APPEND srcs list(APPEND srcs
# common # common
${tusb_src}/tusb.c ${tusb_src}/tusb.c
@@ -68,6 +75,7 @@ endif()
idf_component_register(SRCS ${srcs} idf_component_register(SRCS ${srcs}
INCLUDE_DIRS ${tusb_src} INCLUDE_DIRS ${tusb_src}
REQUIRES src REQUIRES src
PRIV_REQUIRES esp_mm
) )
target_compile_definitions(${COMPONENT_LIB} PUBLIC ${compile_definitions}) target_compile_definitions(${COMPONENT_LIB} PUBLIC ${compile_definitions})

View File

@@ -93,15 +93,15 @@ typedef struct TU_ATTR_ALIGNED(4) {
// clean/flush data cache: write cache -> memory. // clean/flush data cache: write cache -> memory.
// Required before an DMA TX transfer to make sure data is in memory // Required before an DMA TX transfer to make sure data is in memory
void dcd_dcache_clean(void const* addr, uint32_t data_size) TU_ATTR_WEAK; void dcd_dcache_clean(const void* addr, uint32_t data_size);
// invalidate data cache: mark cache as invalid, next read will read from memory // invalidate data cache: mark cache as invalid, next read will read from memory
// Required BOTH before and after an DMA RX transfer // Required BOTH before and after an DMA RX transfer
void dcd_dcache_invalidate(void const* addr, uint32_t data_size) TU_ATTR_WEAK; void dcd_dcache_invalidate(const void* addr, uint32_t data_size);
// clean and invalidate data cache // clean and invalidate data cache
// Required before an DMA transfer where memory is both read/write by DMA // Required before an DMA transfer where memory is both read/write by DMA
void dcd_dcache_clean_invalidate(void const* addr, uint32_t data_size) TU_ATTR_WEAK; void dcd_dcache_clean_invalidate(const void* addr, uint32_t data_size);
//--------------------------------------------------------------------+ //--------------------------------------------------------------------+
// Controller API // Controller API

View File

@@ -46,9 +46,7 @@
// Weak stubs: invoked if no strong implementation is available // Weak stubs: invoked if no strong implementation is available
//--------------------------------------------------------------------+ //--------------------------------------------------------------------+
TU_ATTR_WEAK void tud_event_hook_cb(uint8_t rhport, uint32_t eventid, bool in_isr) { TU_ATTR_WEAK void tud_event_hook_cb(uint8_t rhport, uint32_t eventid, bool in_isr) {
(void) rhport; (void) rhport; (void) eventid; (void) in_isr;
(void) eventid;
(void) in_isr;
} }
TU_ATTR_WEAK void tud_sof_cb(uint32_t frame_count) { TU_ATTR_WEAK void tud_sof_cb(uint32_t frame_count) {
@@ -82,9 +80,7 @@ TU_ATTR_WEAK void tud_resume_cb(void) {
} }
TU_ATTR_WEAK bool tud_vendor_control_xfer_cb(uint8_t rhport, uint8_t stage, tusb_control_request_t const* request) { TU_ATTR_WEAK bool tud_vendor_control_xfer_cb(uint8_t rhport, uint8_t stage, tusb_control_request_t const* request) {
(void) rhport; (void) rhport; (void) stage; (void) request;
(void) stage;
(void) request;
return false; return false;
} }
@@ -101,6 +97,18 @@ TU_ATTR_WEAK void dcd_disconnect(uint8_t rhport) {
(void) rhport; (void) rhport;
} }
TU_ATTR_WEAK void dcd_dcache_clean(const void* addr, uint32_t data_size) {
(void) addr; (void) data_size;
}
TU_ATTR_WEAK void dcd_dcache_invalidate(const void* addr, uint32_t data_size) {
(void) addr; (void) data_size;
}
TU_ATTR_WEAK void dcd_dcache_clean_invalidate(const void* addr, uint32_t data_size) {
(void) addr; (void) data_size;
}
//--------------------------------------------------------------------+ //--------------------------------------------------------------------+
// Device Data // Device Data
//--------------------------------------------------------------------+ //--------------------------------------------------------------------+

View File

@@ -46,8 +46,7 @@
//--------------------------------------------------------------------+ //--------------------------------------------------------------------+
// MACRO TYPEDEF CONSTANT ENUM // MACRO TYPEDEF CONSTANT ENUM
//--------------------------------------------------------------------+ //--------------------------------------------------------------------+
static CFG_TUD_MEM_SECTION CFG_TUD_MEM_ALIGN uint32_t _setup_packet[2];
static CFG_TUD_MEM_SECTION TU_ATTR_ALIGNED(4) uint32_t _setup_packet[2];
typedef struct { typedef struct {
uint8_t* buffer; uint8_t* buffer;
@@ -73,6 +72,25 @@ static bool _sof_en;
//-------------------------------------------------------------------- //--------------------------------------------------------------------
// DMA // DMA
//-------------------------------------------------------------------- //--------------------------------------------------------------------
#if DWC2_ENABLE_MEM_CACHE
void dcd_dcache_clean(const void* addr, uint32_t data_size) {
if (addr && data_size) {
dwc2_dcache_clean(addr, data_size);
}
}
void dcd_dcache_invalidate(const void* addr, uint32_t data_size) {
if (addr && data_size) {
dwc2_dcache_invalidate(addr, data_size);
}
}
void dcd_dcache_clean_invalidate(const void* addr, uint32_t data_size) {
if (addr && data_size) {
dwc2_dcache_clean_invalidate(addr, data_size);
}
}
#endif
TU_ATTR_ALWAYS_INLINE static inline bool dma_device_enabled(const dwc2_regs_t* dwc2) { TU_ATTR_ALWAYS_INLINE static inline bool dma_device_enabled(const dwc2_regs_t* dwc2) {
(void) dwc2; (void) dwc2;
@@ -180,7 +198,7 @@ static bool dfifo_alloc(uint8_t rhport, uint8_t ep_addr, uint16_t packet_size) {
// Check if free space is available // Check if free space is available
TU_ASSERT(_dfifo_top >= fifo_size + dwc2->grxfsiz); TU_ASSERT(_dfifo_top >= fifo_size + dwc2->grxfsiz);
_dfifo_top -= fifo_size; _dfifo_top -= fifo_size;
TU_LOG(DWC2_DEBUG, " TX FIFO %u: allocated %u words at offset %u\r\n", epnum, fifo_size, _dfifo_top); // TU_LOG(DWC2_DEBUG, " TX FIFO %u: allocated %u words at offset %u\r\n", epnum, fifo_size, _dfifo_top);
// Both TXFD and TXSA are in unit of 32-bit words. // Both TXFD and TXSA are in unit of 32-bit words.
if (epnum == 0) { if (epnum == 0) {
@@ -348,14 +366,18 @@ static void edpt_schedule_packets(uint8_t rhport, const uint8_t epnum, const uin
const bool is_dma = dma_device_enabled(dwc2); const bool is_dma = dma_device_enabled(dwc2);
if(is_dma) { if(is_dma) {
if (dir == TUSB_DIR_IN && total_bytes != 0) {
dcd_dcache_clean(xfer->buffer, total_bytes);
}
dep->diepdma = (uintptr_t) xfer->buffer; dep->diepdma = (uintptr_t) xfer->buffer;
} dep->diepctl = depctl.value; // enable endpoint
} else {
dep->diepctl = depctl.value; // enable endpoint
dep->diepctl = depctl.value; // enable endpoint // Enable tx fifo empty interrupt only if there is data. Note must after depctl enable
if (dir == TUSB_DIR_IN && total_bytes != 0) {
// Slave: enable tx fifo empty interrupt only if there is data. Note must after depctl enable dwc2->diepempmsk |= (1 << epnum);
if (!is_dma && dir == TUSB_DIR_IN && total_bytes != 0) { }
dwc2->diepempmsk |= (1 << epnum);
} }
} }
@@ -847,6 +869,7 @@ static void handle_epout_dma(uint8_t rhport, uint8_t epnum, dwc2_doepint_t doepi
if (doepint_bm.setup_phase_done) { if (doepint_bm.setup_phase_done) {
dma_setup_prepare(rhport); dma_setup_prepare(rhport);
dcd_dcache_invalidate(_setup_packet, 8);
dcd_event_setup_received(rhport, (uint8_t*) _setup_packet, true); dcd_event_setup_received(rhport, (uint8_t*) _setup_packet, true);
return; return;
} }
@@ -873,6 +896,7 @@ static void handle_epout_dma(uint8_t rhport, uint8_t epnum, dwc2_doepint_t doepi
dma_setup_prepare(rhport); dma_setup_prepare(rhport);
} }
dcd_dcache_invalidate(xfer->buffer, xfer->total_len);
dcd_event_xfer_complete(rhport, epnum, xfer->total_len, XFER_RESULT_SUCCESS, true); dcd_event_xfer_complete(rhport, epnum, xfer->total_len, XFER_RESULT_SUCCESS, true);
} }
} }

View File

@@ -229,6 +229,8 @@ bool dwc2_core_init(uint8_t rhport, bool is_highspeed, bool is_dma) {
dwc2->gotgint = 0xFFFFFFFFU; dwc2->gotgint = 0xFFFFFFFFU;
dwc2->gintmsk = 0; dwc2->gintmsk = 0;
TU_LOG(DWC2_COMMON_DEBUG, "DMA = %u\r\n", is_dma);
if (is_dma) { if (is_dma) {
// DMA seems to be only settable after a core reset, and not possible to switch on-the-fly // DMA seems to be only settable after a core reset, and not possible to switch on-the-fly
dwc2->gahbcfg |= GAHBCFG_DMAEN | GAHBCFG_HBSTLEN_2; dwc2->gahbcfg |= GAHBCFG_DMAEN | GAHBCFG_HBSTLEN_2;

View File

@@ -111,6 +111,50 @@ TU_ATTR_ALWAYS_INLINE static inline void dwc2_phy_update(dwc2_regs_t* dwc2, uint
// maybe usb_utmi_hal_disable() // maybe usb_utmi_hal_disable()
} }
//--------------------------------------------------------------------+
// Data Cache
//--------------------------------------------------------------------+
#if defined(SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE) && SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE
#include "sdkconfig.h"
#include "hal/cache_hal.h"
#include "esp_cache.h"
#include "esp_log.h"
#define DWC2_MEM_CACHE_LINE_SIZE CONFIG_CACHE_L1_CACHE_LINE_SIZE
#define DWC2_ENABLE_MEM_CACHE 1
TU_ATTR_ALWAYS_INLINE static inline uint32_t round_up_to_cache_line_size(uint32_t size) {
if (size & (CONFIG_CACHE_L1_CACHE_LINE_SIZE-1)) {
size = (size & ~(CONFIG_CACHE_L1_CACHE_LINE_SIZE-1)) + CONFIG_CACHE_L1_CACHE_LINE_SIZE;
}
return size;
}
TU_ATTR_ALWAYS_INLINE static inline void dwc2_dcache_clean(const void* addr, uint32_t data_size) {
// round up to cache line size
const int flag = ESP_CACHE_MSYNC_FLAG_TYPE_DATA | ESP_CACHE_MSYNC_FLAG_DIR_C2M;
data_size = round_up_to_cache_line_size(data_size);
//ESP_EARLY_LOGI("ESP32_DWC", "dcache clean, addr 0x%"PRIx32", size %d (%s)", (uintptr_t)addr, data_size);
assert(ESP_OK == esp_cache_msync((void*)addr, data_size, flag));
}
TU_ATTR_ALWAYS_INLINE static inline void dwc2_dcache_invalidate(const void* addr, uint32_t data_size) {
const int flag = ESP_CACHE_MSYNC_FLAG_TYPE_DATA | ESP_CACHE_MSYNC_FLAG_DIR_M2C;
data_size = round_up_to_cache_line_size(data_size);
//ESP_EARLY_LOGI("ESP32_DWC", "dcache invalidate, addr 0x%"PRIx32", size %d (%s)", (uintptr_t)addr, data_size);
assert(ESP_OK == esp_cache_msync((void*)addr, data_size, flag));
}
TU_ATTR_ALWAYS_INLINE static inline void dwc2_dcache_clean_invalidate(const void* addr, uint32_t data_size) {
const int flag = ESP_CACHE_MSYNC_FLAG_TYPE_DATA | ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_DIR_M2C;
data_size = round_up_to_cache_line_size(data_size);
//ESP_EARLY_LOGI("ESP32_DWC", "dcache clean_invalidate, addr 0x%"PRIx32", size %d (%s)", (uintptr_t)addr, data_size);
assert(ESP_OK == esp_cache_msync((void*)addr, data_size, flag));
}
#endif // SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif