From 9fa192ea22025946db64f6097f30bfa5f4a6e41b Mon Sep 17 00:00:00 2001 From: Yogesh Mohan Marimuthu Date: Mon, 16 Jun 2025 18:53:36 +0530 Subject: [PATCH] radeonsi: submit cs_preamble_state to as first job in userqueue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also any other new context's cs_preamble_state will not be submitted. Reviewed-by: Marek Olšák Part-of: --- .../drivers/radeonsi/si_cp_reg_shadowing.c | 6 +- src/gallium/include/winsys/radeon_winsys.h | 7 +++ src/gallium/winsys/amdgpu/drm/amdgpu_userq.c | 59 ++++++++++++++++++- src/gallium/winsys/amdgpu/drm/amdgpu_userq.h | 5 ++ src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 1 + 5 files changed, 76 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c b/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c index 36d88f21f12..d5f2427534f 100644 --- a/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c +++ b/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c @@ -14,7 +14,11 @@ bool si_init_cp_reg_shadowing(struct si_context *sctx) if (!si_init_gfx_preamble_state(sctx)) return false; - if (sctx->uses_kernelq_reg_shadowing) { + if (sctx->uses_userq_reg_shadowing) { + sctx->ws->userq_submit_cs_preamble_ib_once(&sctx->gfx_cs, &sctx->cs_preamble_state->base); + si_pm4_free_state(sctx, sctx->cs_preamble_state, ~0); + sctx->cs_preamble_state = NULL; + } else if (sctx->uses_kernelq_reg_shadowing) { if (sctx->screen->info.has_fw_based_shadowing) { sctx->shadowing.registers = si_aligned_buffer_create(sctx->b.screen, diff --git a/src/gallium/include/winsys/radeon_winsys.h b/src/gallium/include/winsys/radeon_winsys.h index 70261cf27a5..08fdfb873e7 100644 --- a/src/gallium/include/winsys/radeon_winsys.h +++ b/src/gallium/include/winsys/radeon_winsys.h @@ -26,6 +26,7 @@ #include "amd/common/ac_gpu_info.h" #include "amd/common/ac_surface.h" +#include "amd/common/ac_pm4.h" #include "pipebuffer/pb_buffer.h" /* Tiling flags. */ @@ -798,6 +799,12 @@ struct radeon_winsys { */ void (*cs_set_mcbp_reg_shadowing_va)(struct radeon_cmdbuf *rcs, uint64_t regs_va, uint64_t csa_va); + /** + * Submits the preamble IB, which is the IB that initializes immutable registers and states. + * This must be the first IB for that queue type, and it affects all current and future contexts. + * If the IB has been submitted already, the call is ignored. + */ + bool (*userq_submit_cs_preamble_ib_once)(struct radeon_cmdbuf *rcs, struct ac_pm4_state *pm4); }; static inline bool radeon_emitted(struct radeon_cmdbuf *rcs, unsigned num_dw) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_userq.c b/src/gallium/winsys/amdgpu/drm/amdgpu_userq.c index 5c1d88e1322..a89c0e9cc12 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_userq.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_userq.c @@ -5,7 +5,9 @@ */ #include "amdgpu_bo.h" +#include "amdgpu_cs.h" #include "ac_linux_drm.h" +#include "sid.h" static bool amdgpu_userq_ring_init(struct amdgpu_winsys *aws, struct amdgpu_userq *userq) @@ -65,6 +67,7 @@ amdgpu_userq_deinit(struct amdgpu_winsys *aws, struct amdgpu_userq *userq) case AMD_IP_GFX: radeon_bo_reference(&aws->dummy_sws.base, &userq->gfx_data.csa_bo, NULL); radeon_bo_reference(&aws->dummy_sws.base, &userq->gfx_data.shadow_bo, NULL); + radeon_bo_reference(&aws->dummy_sws.base, &userq->cs_preamble_ib_bo, NULL); break; case AMD_IP_COMPUTE: radeon_bo_reference(&aws->dummy_sws.base, &userq->compute_data.eop_bo, NULL); @@ -112,7 +115,8 @@ amdgpu_userq_init(struct amdgpu_winsys *aws, struct amdgpu_userq *userq, enum am userq->gfx_data.shadow_bo = amdgpu_bo_create(aws, aws->info.fw_based_mcbp.shadow_size, aws->info.fw_based_mcbp.shadow_alignment, RADEON_DOMAIN_VRAM, - RADEON_FLAG_NO_INTERPROCESS_SHARING); + RADEON_FLAG_NO_INTERPROCESS_SHARING | + RADEON_FLAG_CLEAR_VRAM); if (!userq->gfx_data.shadow_bo) goto fail; @@ -206,3 +210,56 @@ fail: simple_mtx_unlock(&userq->lock); return false; } + +static bool +amdgpu_userq_submit_cs_preamble_ib_once(struct radeon_cmdbuf *rcs, struct ac_pm4_state *pm4) +{ + struct amdgpu_cs *acs = amdgpu_cs(rcs); + struct amdgpu_winsys *aws = acs->aws; + struct amdgpu_userq *userq = &aws->queues[acs->queue_index].userq; + uint64_t *cs_preamble_ib_bo_map; + + simple_mtx_lock(&userq->lock); + + if (userq->is_cs_preamble_ib_sent) { + simple_mtx_unlock(&userq->lock); + return true; + } + + userq->is_cs_preamble_ib_sent = true; + assert(userq->ip_type == AMD_IP_GFX); + assert(!userq->next_wptr); + + userq->cs_preamble_ib_bo = amdgpu_bo_create(aws, pm4->ndw * 4, 256, RADEON_DOMAIN_GTT, + RADEON_FLAG_GL2_BYPASS | + RADEON_FLAG_NO_INTERPROCESS_SHARING); + if (!userq->cs_preamble_ib_bo) { + simple_mtx_unlock(&userq->lock); + return false; + } + + cs_preamble_ib_bo_map = amdgpu_bo_map(&aws->dummy_sws.base, userq->cs_preamble_ib_bo, + NULL, PIPE_MAP_READ | PIPE_MAP_WRITE | + PIPE_MAP_UNSYNCHRONIZED); + if (!cs_preamble_ib_bo_map) { + simple_mtx_unlock(&userq->lock); + return false; + } + + memcpy(cs_preamble_ib_bo_map, &pm4->pm4, pm4->ndw * 4); + + amdgpu_pkt_begin(); + amdgpu_pkt_add_dw(PKT3(PKT3_INDIRECT_BUFFER, 2, 0)); + amdgpu_pkt_add_dw(amdgpu_bo_get_va(userq->cs_preamble_ib_bo)); + amdgpu_pkt_add_dw(amdgpu_bo_get_va(userq->cs_preamble_ib_bo) >> 32); + amdgpu_pkt_add_dw(pm4->ndw | S_3F3_INHERIT_VMID_MQD_GFX(1)); + amdgpu_pkt_end(); + + simple_mtx_unlock(&userq->lock); + return true; +} + +void amdgpu_userq_init_functions(struct amdgpu_screen_winsys *sws) +{ + sws->base.userq_submit_cs_preamble_ib_once = amdgpu_userq_submit_cs_preamble_ib_once; +} diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_userq.h b/src/gallium/winsys/amdgpu/drm/amdgpu_userq.h index 809ee4b5255..60342b6fc0b 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_userq.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_userq.h @@ -33,6 +33,7 @@ extern "C" { } while (0) struct amdgpu_winsys; +struct amdgpu_screen_winsys; struct amdgpu_userq_gfx_data { struct pb_buffer_lean *csa_bo; @@ -72,6 +73,8 @@ struct amdgpu_userq { struct pb_buffer_lean *doorbell_bo; uint64_t *doorbell_bo_map; + struct pb_buffer_lean *cs_preamble_ib_bo; + bool is_cs_preamble_ib_sent; uint32_t userq_handle; enum amd_ip_type ip_type; simple_mtx_t lock; @@ -91,6 +94,8 @@ amdgpu_userq_init(struct amdgpu_winsys *aws, struct amdgpu_userq *userq, enum am void amdgpu_userq_deinit(struct amdgpu_winsys *aws, struct amdgpu_userq *userq); +void amdgpu_userq_init_functions(struct amdgpu_screen_winsys *sws); + #ifdef __cplusplus } #endif diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index 70bef8c9540..88e25eddc52 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -547,6 +547,7 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, amdgpu_bo_init_functions(sws); amdgpu_cs_init_functions(sws); + amdgpu_userq_init_functions(sws); amdgpu_surface_init_functions(sws); simple_mtx_lock(&aws->sws_list_lock);