radeonsi: submit cs_preamble_state to as first job in userqueue

Also any other new context's cs_preamble_state will not be submitted.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35106>
This commit is contained in:
Yogesh Mohan Marimuthu 2025-06-16 18:53:36 +05:30 committed by Marge Bot
parent 0186977988
commit 9fa192ea22
5 changed files with 76 additions and 2 deletions

View file

@ -14,7 +14,11 @@ bool si_init_cp_reg_shadowing(struct si_context *sctx)
if (!si_init_gfx_preamble_state(sctx))
return false;
if (sctx->uses_kernelq_reg_shadowing) {
if (sctx->uses_userq_reg_shadowing) {
sctx->ws->userq_submit_cs_preamble_ib_once(&sctx->gfx_cs, &sctx->cs_preamble_state->base);
si_pm4_free_state(sctx, sctx->cs_preamble_state, ~0);
sctx->cs_preamble_state = NULL;
} else if (sctx->uses_kernelq_reg_shadowing) {
if (sctx->screen->info.has_fw_based_shadowing) {
sctx->shadowing.registers =
si_aligned_buffer_create(sctx->b.screen,

View file

@ -26,6 +26,7 @@
#include "amd/common/ac_gpu_info.h"
#include "amd/common/ac_surface.h"
#include "amd/common/ac_pm4.h"
#include "pipebuffer/pb_buffer.h"
/* Tiling flags. */
@ -798,6 +799,12 @@ struct radeon_winsys {
*/
void (*cs_set_mcbp_reg_shadowing_va)(struct radeon_cmdbuf *rcs, uint64_t regs_va,
uint64_t csa_va);
/**
* Submits the preamble IB, which is the IB that initializes immutable registers and states.
* This must be the first IB for that queue type, and it affects all current and future contexts.
* If the IB has been submitted already, the call is ignored.
*/
bool (*userq_submit_cs_preamble_ib_once)(struct radeon_cmdbuf *rcs, struct ac_pm4_state *pm4);
};
static inline bool radeon_emitted(struct radeon_cmdbuf *rcs, unsigned num_dw)

View file

@ -5,7 +5,9 @@
*/
#include "amdgpu_bo.h"
#include "amdgpu_cs.h"
#include "ac_linux_drm.h"
#include "sid.h"
static bool
amdgpu_userq_ring_init(struct amdgpu_winsys *aws, struct amdgpu_userq *userq)
@ -65,6 +67,7 @@ amdgpu_userq_deinit(struct amdgpu_winsys *aws, struct amdgpu_userq *userq)
case AMD_IP_GFX:
radeon_bo_reference(&aws->dummy_sws.base, &userq->gfx_data.csa_bo, NULL);
radeon_bo_reference(&aws->dummy_sws.base, &userq->gfx_data.shadow_bo, NULL);
radeon_bo_reference(&aws->dummy_sws.base, &userq->cs_preamble_ib_bo, NULL);
break;
case AMD_IP_COMPUTE:
radeon_bo_reference(&aws->dummy_sws.base, &userq->compute_data.eop_bo, NULL);
@ -112,7 +115,8 @@ amdgpu_userq_init(struct amdgpu_winsys *aws, struct amdgpu_userq *userq, enum am
userq->gfx_data.shadow_bo = amdgpu_bo_create(aws, aws->info.fw_based_mcbp.shadow_size,
aws->info.fw_based_mcbp.shadow_alignment,
RADEON_DOMAIN_VRAM,
RADEON_FLAG_NO_INTERPROCESS_SHARING);
RADEON_FLAG_NO_INTERPROCESS_SHARING |
RADEON_FLAG_CLEAR_VRAM);
if (!userq->gfx_data.shadow_bo)
goto fail;
@ -206,3 +210,56 @@ fail:
simple_mtx_unlock(&userq->lock);
return false;
}
static bool
amdgpu_userq_submit_cs_preamble_ib_once(struct radeon_cmdbuf *rcs, struct ac_pm4_state *pm4)
{
struct amdgpu_cs *acs = amdgpu_cs(rcs);
struct amdgpu_winsys *aws = acs->aws;
struct amdgpu_userq *userq = &aws->queues[acs->queue_index].userq;
uint64_t *cs_preamble_ib_bo_map;
simple_mtx_lock(&userq->lock);
if (userq->is_cs_preamble_ib_sent) {
simple_mtx_unlock(&userq->lock);
return true;
}
userq->is_cs_preamble_ib_sent = true;
assert(userq->ip_type == AMD_IP_GFX);
assert(!userq->next_wptr);
userq->cs_preamble_ib_bo = amdgpu_bo_create(aws, pm4->ndw * 4, 256, RADEON_DOMAIN_GTT,
RADEON_FLAG_GL2_BYPASS |
RADEON_FLAG_NO_INTERPROCESS_SHARING);
if (!userq->cs_preamble_ib_bo) {
simple_mtx_unlock(&userq->lock);
return false;
}
cs_preamble_ib_bo_map = amdgpu_bo_map(&aws->dummy_sws.base, userq->cs_preamble_ib_bo,
NULL, PIPE_MAP_READ | PIPE_MAP_WRITE |
PIPE_MAP_UNSYNCHRONIZED);
if (!cs_preamble_ib_bo_map) {
simple_mtx_unlock(&userq->lock);
return false;
}
memcpy(cs_preamble_ib_bo_map, &pm4->pm4, pm4->ndw * 4);
amdgpu_pkt_begin();
amdgpu_pkt_add_dw(PKT3(PKT3_INDIRECT_BUFFER, 2, 0));
amdgpu_pkt_add_dw(amdgpu_bo_get_va(userq->cs_preamble_ib_bo));
amdgpu_pkt_add_dw(amdgpu_bo_get_va(userq->cs_preamble_ib_bo) >> 32);
amdgpu_pkt_add_dw(pm4->ndw | S_3F3_INHERIT_VMID_MQD_GFX(1));
amdgpu_pkt_end();
simple_mtx_unlock(&userq->lock);
return true;
}
void amdgpu_userq_init_functions(struct amdgpu_screen_winsys *sws)
{
sws->base.userq_submit_cs_preamble_ib_once = amdgpu_userq_submit_cs_preamble_ib_once;
}

View file

@ -33,6 +33,7 @@ extern "C" {
} while (0)
struct amdgpu_winsys;
struct amdgpu_screen_winsys;
struct amdgpu_userq_gfx_data {
struct pb_buffer_lean *csa_bo;
@ -72,6 +73,8 @@ struct amdgpu_userq {
struct pb_buffer_lean *doorbell_bo;
uint64_t *doorbell_bo_map;
struct pb_buffer_lean *cs_preamble_ib_bo;
bool is_cs_preamble_ib_sent;
uint32_t userq_handle;
enum amd_ip_type ip_type;
simple_mtx_t lock;
@ -91,6 +94,8 @@ amdgpu_userq_init(struct amdgpu_winsys *aws, struct amdgpu_userq *userq, enum am
void
amdgpu_userq_deinit(struct amdgpu_winsys *aws, struct amdgpu_userq *userq);
void amdgpu_userq_init_functions(struct amdgpu_screen_winsys *sws);
#ifdef __cplusplus
}
#endif

View file

@ -547,6 +547,7 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
amdgpu_bo_init_functions(sws);
amdgpu_cs_init_functions(sws);
amdgpu_userq_init_functions(sws);
amdgpu_surface_init_functions(sws);
simple_mtx_lock(&aws->sws_list_lock);