diff --git a/src/amd/common/ac_cmdbuf_cp.c b/src/amd/common/ac_cmdbuf_cp.c index 91dea443376..9ee85b9fca4 100644 --- a/src/amd/common/ac_cmdbuf_cp.c +++ b/src/amd/common/ac_cmdbuf_cp.c @@ -403,20 +403,23 @@ ac_emit_cp_acquire_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, enum amd_ip_type ip_type, uint32_t engine, uint32_t gcr_cntl) { - assert(engine == V_581B_CP_PFP || engine == V_581B_CP_ME); + assert(ip_type != AMD_IP_GFX || (engine == V_581B_CP_PFP || engine == V_581B_CP_ME)); assert(gcr_cntl); ac_cmdbuf_begin(cs); if (gfx_level >= GFX10) { /* ACQUIRE_MEM in PFP is implemented as ACQUIRE_MEM in ME + PFP_SYNC_ME. */ - const uint32_t engine_flag = engine == V_581B_CP_ME ? BITFIELD_BIT(31) : 0; + const uint32_t engine_flag = + ip_type == AMD_IP_GFX && engine == V_581B_CP_ME ? BITFIELD_BIT(31) : 0; + const uint32_t coher_size_hi = + gfx_level >= GFX11 && ip_type == AMD_IP_GFX ? 0xffffff : 0xff; /* Flush caches. This doesn't wait for idle. */ ac_cmdbuf_emit(PKT3(PKT3_ACQUIRE_MEM, 6, 0)); ac_cmdbuf_emit(engine_flag); /* which engine to use */ ac_cmdbuf_emit(0xffffffff); /* CP_COHER_SIZE */ - ac_cmdbuf_emit(0x01ffffff); /* CP_COHER_SIZE_HI */ + ac_cmdbuf_emit(coher_size_hi); /* CP_COHER_SIZE_HI */ ac_cmdbuf_emit(0); /* CP_COHER_BASE */ ac_cmdbuf_emit(0); /* CP_COHER_BASE_HI */ ac_cmdbuf_emit(0x0000000A); /* POLL_INTERVAL */ @@ -429,7 +432,7 @@ ac_emit_cp_acquire_mem(struct ac_cmdbuf *cs, enum amd_gfx_level gfx_level, ac_cmdbuf_emit(PKT3(PKT3_ACQUIRE_MEM, 5, 0) | PKT3_SHADER_TYPE_S(is_mec)); ac_cmdbuf_emit(gcr_cntl); /* CP_COHER_CNTL */ ac_cmdbuf_emit(0xffffffff); /* CP_COHER_SIZE */ - ac_cmdbuf_emit(0xffffff); /* CP_COHER_SIZE_HI */ + ac_cmdbuf_emit(0x000000ff); /* CP_COHER_SIZE_HI */ ac_cmdbuf_emit(0); /* CP_COHER_BASE */ ac_cmdbuf_emit(0); /* CP_COHER_BASE_HI */ ac_cmdbuf_emit(0x0000000A); /* POLL_INTERVAL */ diff --git a/src/amd/vulkan/radv_cp_dma.c b/src/amd/vulkan/radv_cp_dma.c index 495c709bd46..19a75341eee 100644 --- a/src/amd/vulkan/radv_cp_dma.c +++ b/src/amd/vulkan/radv_cp_dma.c @@ -63,8 +63,8 @@ radv_cs_emit_cp_dma(struct radv_device *device, struct radv_cmd_stream *cs, bool else command |= S_415_BYTE_COUNT(size); - /* Sync flags. */ - if (flags & CP_DMA_SYNC) + /* Sync flags. Only present for PFP/ME. MEC always sync. */ + if ((flags & CP_DMA_SYNC) && cs->hw_ip == AMD_IP_GFX) header |= S_501_CP_SYNC(1); if (flags & CP_DMA_RAW_WAIT) diff --git a/src/amd/vulkan/radv_perfcounter.c b/src/amd/vulkan/radv_perfcounter.c index 20cb76c1178..96ca57e5dff 100644 --- a/src/amd/vulkan/radv_perfcounter.c +++ b/src/amd/vulkan/radv_perfcounter.c @@ -539,16 +539,20 @@ radv_pc_sample_block(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *blo static void radv_pc_wait_idle(struct radv_cmd_buffer *cmd_buffer) { + struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_cmd_stream *cs = cmd_buffer->cs; radeon_begin(cs); radeon_event_write(V_028A90_CS_PARTIAL_FLUSH); + const uint32_t coher_size_hi = pdev->info.gfx_level >= GFX11 ? 0xffffff : 0xff; + radeon_emit(PKT3(PKT3_ACQUIRE_MEM, 6, 0)); radeon_emit(0); /* CP_COHER_CNTL */ radeon_emit(0xffffffff); /* CP_COHER_SIZE */ - radeon_emit(0xffffff); /* CP_COHER_SIZE_HI */ + radeon_emit(coher_size_hi); /* CP_COHER_SIZE_HI */ radeon_emit(0); /* CP_COHER_BASE */ radeon_emit(0); /* CP_COHER_BASE_HI */ radeon_emit(0x0000000A); /* POLL_INTERVAL */ diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index 26c9d230243..c31eaee2e04 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -53,8 +53,8 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, ui else command |= S_415_BYTE_COUNT(size); - /* Sync flags. */ - if (flags & CP_DMA_SYNC) + /* Sync flags. Only present for PFP/ME. MEC always sync. */ + if ((flags & CP_DMA_SYNC) && sctx->is_gfx_queue) header |= S_501_CP_SYNC(1); if (flags & CP_DMA_RAW_WAIT)