mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 00:00:11 +01:00
winsys/amdgpu: enable userq reg shadowing for gfx11.5
Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36700>
This commit is contained in:
parent
700850f29d
commit
3ba6c9d0ac
8 changed files with 136 additions and 13 deletions
|
|
@ -2929,10 +2929,8 @@ void ac_print_nonshadowed_regs(enum amd_gfx_level gfx_level, enum radeon_family
|
|||
}
|
||||
}
|
||||
|
||||
static void ac_build_load_reg(const struct radeon_info *info,
|
||||
struct ac_pm4_state *pm4,
|
||||
enum ac_reg_range_type type,
|
||||
uint64_t gpu_address)
|
||||
void ac_build_load_reg(const struct radeon_info *info, struct ac_pm4_state *pm4,
|
||||
enum ac_reg_range_type type, uint64_t gpu_address)
|
||||
{
|
||||
unsigned packet, num_ranges, offset;
|
||||
const struct ac_reg_range *ranges;
|
||||
|
|
|
|||
|
|
@ -32,6 +32,8 @@ void ac_get_reg_ranges(enum amd_gfx_level gfx_level, enum radeon_family family,
|
|||
const struct ac_reg_range **ranges);
|
||||
struct ac_pm4_state *ac_emulate_clear_state(const struct radeon_info *info);
|
||||
void ac_print_nonshadowed_regs(enum amd_gfx_level gfx_level, enum radeon_family family);
|
||||
void ac_build_load_reg(const struct radeon_info *info, struct ac_pm4_state *pm4,
|
||||
enum ac_reg_range_type type, uint64_t gpu_address);
|
||||
|
||||
struct ac_pm4_state *ac_create_shadowing_ib_preamble(const struct radeon_info *info,
|
||||
uint64_t gpu_address,
|
||||
|
|
|
|||
|
|
@ -15,6 +15,36 @@ bool si_init_cp_reg_shadowing(struct si_context *sctx)
|
|||
return false;
|
||||
|
||||
if (sctx->uses_userq_reg_shadowing) {
|
||||
/* In case of GFX11_5, shadow_va passed in ac_drm_create_userqueue() is not used by the
|
||||
* firmware. Instead need to initialize the register shadowing addresses using LOAD_* packets.
|
||||
* Also the LOAD_* packets and enabling register shadowing in CONTEXT_CONTROL packet has to
|
||||
* be submitted for every job.
|
||||
*/
|
||||
if (sctx->gfx_level == GFX11_5) {
|
||||
struct ac_pm4_state *shadowing_pm4 = ac_pm4_create_sized(&sctx->screen->info, false, 1024,
|
||||
sctx->is_gfx_queue);
|
||||
if (!shadowing_pm4) {
|
||||
mesa_loge("failed to allocate memory for shadowing_pm4");
|
||||
return false;
|
||||
}
|
||||
|
||||
ac_pm4_cmd_add(shadowing_pm4, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
|
||||
ac_pm4_cmd_add(shadowing_pm4, CC0_UPDATE_LOAD_ENABLES(1) |
|
||||
CC0_LOAD_PER_CONTEXT_STATE(1) | CC0_LOAD_CS_SH_REGS(1) |
|
||||
CC0_LOAD_GFX_SH_REGS(1) | CC0_LOAD_GLOBAL_UCONFIG(1));
|
||||
ac_pm4_cmd_add(shadowing_pm4, CC1_UPDATE_SHADOW_ENABLES(1) |
|
||||
CC1_SHADOW_PER_CONTEXT_STATE(1) | CC1_SHADOW_CS_SH_REGS(1) |
|
||||
CC1_SHADOW_GFX_SH_REGS(1) | CC1_SHADOW_GLOBAL_UCONFIG(1) |
|
||||
CC1_SHADOW_GLOBAL_CONFIG(1));
|
||||
|
||||
for (unsigned i = 0; i < SI_NUM_REG_RANGES; i++)
|
||||
ac_build_load_reg(&sctx->screen->info, shadowing_pm4, i,
|
||||
sctx->ws->userq_f32_get_shadow_regs_va(&sctx->gfx_cs));
|
||||
|
||||
sctx->ws->userq_f32_init_reg_shadowing(&sctx->gfx_cs, shadowing_pm4);
|
||||
ac_pm4_free_state(shadowing_pm4);
|
||||
}
|
||||
|
||||
sctx->ws->userq_submit_cs_preamble_ib_once(&sctx->gfx_cs, &sctx->cs_preamble_state->base);
|
||||
si_pm4_free_state(sctx, sctx->cs_preamble_state, ~0);
|
||||
sctx->cs_preamble_state = NULL;
|
||||
|
|
|
|||
|
|
@ -5119,13 +5119,18 @@ static bool gfx10_init_gfx_preamble_state(struct si_context *sctx)
|
|||
}
|
||||
|
||||
if (sctx->uses_userq_reg_shadowing) {
|
||||
ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
|
||||
ac_pm4_cmd_add(&pm4->base, CC0_UPDATE_LOAD_ENABLES(1) | CC0_LOAD_PER_CONTEXT_STATE(1) |
|
||||
CC0_LOAD_CS_SH_REGS(1) | CC0_LOAD_GFX_SH_REGS(1) |
|
||||
CC0_LOAD_GLOBAL_UCONFIG(1));
|
||||
ac_pm4_cmd_add(&pm4->base, CC1_UPDATE_SHADOW_ENABLES(1) | CC1_SHADOW_PER_CONTEXT_STATE(1) |
|
||||
CC1_SHADOW_CS_SH_REGS(1) | CC1_SHADOW_GFX_SH_REGS(1) |
|
||||
CC1_SHADOW_GLOBAL_UCONFIG(1) | CC1_SHADOW_GLOBAL_CONFIG(1));
|
||||
/* In case of GFX11_5, CONTEXT_CONTROL packet is added in si_init_cp_reg_shaodwing()
|
||||
* function.
|
||||
*/
|
||||
if (sctx->gfx_level != GFX11_5) {
|
||||
ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
|
||||
ac_pm4_cmd_add(&pm4->base, CC0_UPDATE_LOAD_ENABLES(1) | CC0_LOAD_PER_CONTEXT_STATE(1) |
|
||||
CC0_LOAD_CS_SH_REGS(1) | CC0_LOAD_GFX_SH_REGS(1) |
|
||||
CC0_LOAD_GLOBAL_UCONFIG(1));
|
||||
ac_pm4_cmd_add(&pm4->base, CC1_UPDATE_SHADOW_ENABLES(1) | CC1_SHADOW_PER_CONTEXT_STATE(1) |
|
||||
CC1_SHADOW_CS_SH_REGS(1) | CC1_SHADOW_GFX_SH_REGS(1) |
|
||||
CC1_SHADOW_GLOBAL_UCONFIG(1) | CC1_SHADOW_GLOBAL_CONFIG(1));
|
||||
}
|
||||
} else if (sctx->is_gfx_queue && !sctx->uses_kernelq_reg_shadowing) {
|
||||
ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
|
||||
ac_pm4_cmd_add(&pm4->base, CC0_UPDATE_LOAD_ENABLES(1));
|
||||
|
|
|
|||
|
|
@ -806,6 +806,21 @@ struct radeon_winsys {
|
|||
* be combined as a gang submission to GPU.
|
||||
*/
|
||||
bool (*cs_create_compute_gang)(struct radeon_cmdbuf *rcs);
|
||||
|
||||
/**
|
||||
* In case of gfx11.5, register shadowing enabling and shadow regs addresses has to be done
|
||||
* using CONTEXT_CONTROL and LOAD_* packets. Also these packets have to be sumitted for every
|
||||
* job.
|
||||
*/
|
||||
bool (*userq_f32_init_reg_shadowing)(struct radeon_cmdbuf *rcs, struct ac_pm4_state *pm4);
|
||||
|
||||
/**
|
||||
* Gets the shadow regs va address from the given radeon_cmdbuf. The radeon_cmdbuf will be gfx_cs
|
||||
* and it is per context. In case of userqueue, The shadow regs va address is per userqueue. The
|
||||
* gfx_cs will be for tied to a userqueue and the shadow regs va address returned will be for
|
||||
* that userqueue.
|
||||
*/
|
||||
uint64_t (*userq_f32_get_shadow_regs_va)(struct radeon_cmdbuf *rcs);
|
||||
};
|
||||
|
||||
static inline bool radeon_emitted(struct radeon_cmdbuf *rcs, unsigned num_dw)
|
||||
|
|
|
|||
|
|
@ -1476,6 +1476,13 @@ static void amdgpu_cs_add_userq_packets(struct amdgpu_winsys *aws,
|
|||
amdgpu_pkt_add_dw(PKT3(PKT3_HDP_FLUSH, 0, 0));
|
||||
amdgpu_pkt_add_dw(0x0);
|
||||
|
||||
if (userq->f32_shadowing_ib_bo) {
|
||||
amdgpu_pkt_add_dw(PKT3(PKT3_INDIRECT_BUFFER, 2, 0));
|
||||
amdgpu_pkt_add_dw(amdgpu_bo_get_va(userq->f32_shadowing_ib_bo));
|
||||
amdgpu_pkt_add_dw(amdgpu_bo_get_va(userq->f32_shadowing_ib_bo) >> 32);
|
||||
amdgpu_pkt_add_dw(userq->f32_shadowing_ib_pm4_dw | S_3F3_INHERIT_VMID_MQD_GFX(1));
|
||||
}
|
||||
|
||||
amdgpu_pkt_add_dw(PKT3(PKT3_INDIRECT_BUFFER, 2, 0));
|
||||
amdgpu_pkt_add_dw(csc->chunk_ib[IB_MAIN].va_start);
|
||||
amdgpu_pkt_add_dw(csc->chunk_ib[IB_MAIN].va_start >> 32);
|
||||
|
|
|
|||
|
|
@ -87,6 +87,7 @@ amdgpu_userq_deinit(struct amdgpu_winsys *aws, struct amdgpu_userq *userq)
|
|||
radeon_bo_reference(&aws->dummy_sws.base, &userq->gfx_data.csa_bo, NULL);
|
||||
radeon_bo_reference(&aws->dummy_sws.base, &userq->gfx_data.shadow_bo, NULL);
|
||||
radeon_bo_reference(&aws->dummy_sws.base, &userq->cs_preamble_ib_bo, NULL);
|
||||
radeon_bo_reference(&aws->dummy_sws.base, &userq->f32_shadowing_ib_bo, NULL);
|
||||
break;
|
||||
case AMD_IP_COMPUTE:
|
||||
radeon_bo_reference(&aws->dummy_sws.base, &userq->compute_data.eop_bo, NULL);
|
||||
|
|
@ -237,7 +238,7 @@ amdgpu_userq_submit_cs_preamble_ib_once(struct radeon_cmdbuf *rcs, struct ac_pm4
|
|||
struct amdgpu_cs *acs = amdgpu_cs(rcs);
|
||||
struct amdgpu_winsys *aws = acs->aws;
|
||||
struct amdgpu_userq *userq = &aws->queues[acs->queue_index].userq;
|
||||
uint64_t *cs_preamble_ib_bo_map;
|
||||
uint8_t *cs_preamble_ib_bo_map;
|
||||
|
||||
simple_mtx_lock(&userq->lock);
|
||||
|
||||
|
|
@ -248,7 +249,6 @@ amdgpu_userq_submit_cs_preamble_ib_once(struct radeon_cmdbuf *rcs, struct ac_pm4
|
|||
|
||||
userq->is_cs_preamble_ib_sent = true;
|
||||
assert(userq->ip_type == AMD_IP_GFX);
|
||||
assert(!userq->next_wptr);
|
||||
|
||||
userq->cs_preamble_ib_bo = amdgpu_bo_create(aws, pm4->ndw * 4, 256, RADEON_DOMAIN_GTT,
|
||||
RADEON_FLAG_GL2_BYPASS |
|
||||
|
|
@ -279,7 +279,66 @@ amdgpu_userq_submit_cs_preamble_ib_once(struct radeon_cmdbuf *rcs, struct ac_pm4
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
amdgpu_userq_f32_init_reg_shadowing(struct radeon_cmdbuf *rcs, struct ac_pm4_state *pm4)
|
||||
{
|
||||
struct amdgpu_cs *acs = amdgpu_cs(rcs);
|
||||
struct amdgpu_winsys *aws = acs->aws;
|
||||
struct amdgpu_userq *userq = &aws->queues[acs->queue_index].userq;
|
||||
uint8_t *shadowing_ib_bo_map;
|
||||
|
||||
simple_mtx_lock(&userq->lock);
|
||||
|
||||
if (userq->f32_is_shadowing_ib_initialized) {
|
||||
simple_mtx_unlock(&userq->lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
userq->f32_is_shadowing_ib_initialized = true;
|
||||
assert(userq->ip_type == AMD_IP_GFX);
|
||||
assert(!userq->next_wptr);
|
||||
|
||||
userq->f32_shadowing_ib_bo = amdgpu_bo_create(aws, pm4->ndw * 4, 256, RADEON_DOMAIN_VRAM,
|
||||
RADEON_FLAG_NO_INTERPROCESS_SHARING);
|
||||
if (!userq->f32_shadowing_ib_bo) {
|
||||
simple_mtx_unlock(&userq->lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
shadowing_ib_bo_map = amdgpu_bo_map(&aws->dummy_sws.base, userq->f32_shadowing_ib_bo, NULL,
|
||||
PIPE_MAP_READ | PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED);
|
||||
if (!shadowing_ib_bo_map) {
|
||||
simple_mtx_unlock(&userq->lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
memcpy(shadowing_ib_bo_map, &pm4->pm4, pm4->ndw * 4);
|
||||
userq->f32_shadowing_ib_pm4_dw = pm4->ndw;
|
||||
|
||||
amdgpu_pkt_begin();
|
||||
amdgpu_pkt_add_dw(PKT3(PKT3_INDIRECT_BUFFER, 2, 0));
|
||||
amdgpu_pkt_add_dw(amdgpu_bo_get_va(userq->f32_shadowing_ib_bo));
|
||||
amdgpu_pkt_add_dw(amdgpu_bo_get_va(userq->f32_shadowing_ib_bo) >> 32);
|
||||
amdgpu_pkt_add_dw(pm4->ndw | S_3F3_INHERIT_VMID_MQD_GFX(1));
|
||||
amdgpu_pkt_end();
|
||||
|
||||
simple_mtx_unlock(&userq->lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
amdgpu_userq_f32_get_shadow_regs_va(struct radeon_cmdbuf *rcs) {
|
||||
struct amdgpu_cs *acs = amdgpu_cs(rcs);
|
||||
struct amdgpu_winsys *aws = acs->aws;
|
||||
struct amdgpu_userq *userq = &aws->queues[acs->queue_index].userq;
|
||||
|
||||
assert(userq->ip_type == AMDGPU_HW_IP_GFX);
|
||||
return amdgpu_bo_get_va(userq->gfx_data.shadow_bo);
|
||||
}
|
||||
|
||||
void amdgpu_userq_init_functions(struct amdgpu_screen_winsys *sws)
|
||||
{
|
||||
sws->base.userq_submit_cs_preamble_ib_once = amdgpu_userq_submit_cs_preamble_ib_once;
|
||||
sws->base.userq_f32_init_reg_shadowing = amdgpu_userq_f32_init_reg_shadowing;
|
||||
sws->base.userq_f32_get_shadow_regs_va = amdgpu_userq_f32_get_shadow_regs_va;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -79,6 +79,13 @@ struct amdgpu_userq {
|
|||
struct pb_buffer_lean *doorbell_bo;
|
||||
uint64_t *doorbell_bo_map;
|
||||
|
||||
/* In case of gfx11.5 shadow register address has to be initialized using LOAD_* packet.
|
||||
* Also for every new ib/job submission, the shadowed registers has to be loaded using LOAD_*
|
||||
* packets.
|
||||
*/
|
||||
struct pb_buffer_lean *f32_shadowing_ib_bo;
|
||||
uint32_t f32_shadowing_ib_pm4_dw;
|
||||
bool f32_is_shadowing_ib_initialized;
|
||||
struct pb_buffer_lean *cs_preamble_ib_bo;
|
||||
bool is_cs_preamble_ib_sent;
|
||||
uint32_t userq_handle;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue