winsys/amdgpu: use AMDGPU_IB_FLAG_PREAMBLE for the CS preamble on gfx10+

This skips the preamble for following IBs if the queue receives IBs from
the same context back-to-back. This eliminates VGT_FLUSH (for tess and
legacy GS) and PS_PARTIAL_FLUSH (for gfx11) in those cases if the preamble
contains them.

v2: only use this on gfx10+ due to stability issues on Stoney and limited
    testing

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16885>
This commit is contained in:
Marek Olšák 2022-05-14 22:16:16 -04:00 committed by Marge Bot
parent 1592921c59
commit 8edafaa25c
5 changed files with 44 additions and 31 deletions

View file

@ -196,8 +196,8 @@ void si_init_cp_reg_shadowing(struct si_context *sctx)
/* Setup preemption. The shadowing preamble will be executed as a preamble IB,
* which will load register values from memory on a context switch.
*/
sctx->ws->cs_setup_preemption(&sctx->gfx_cs, shadowing_preamble->pm4,
shadowing_preamble->ndw);
sctx->ws->cs_set_preamble(&sctx->gfx_cs, shadowing_preamble->pm4, shadowing_preamble->ndw,
false, true);
si_pm4_free_state(sctx, shadowing_preamble, ~0);
}
}

View file

@ -440,7 +440,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs)
struct si_pm4_state *preamble = is_secure ? ctx->cs_preamble_state_tmz :
ctx->cs_preamble_state;
ctx->ws->cs_set_preamble(&ctx->gfx_cs, preamble->pm4, preamble->ndw,
preamble != ctx->last_preamble);
preamble != ctx->last_preamble, false);
ctx->last_preamble = preamble;
}

View file

@ -510,23 +510,18 @@ struct radeon_winsys {
* the command buffer. If the winsys doesn't support preambles, the packets are inserted
* into the command buffer.
*
* If preemption is enabled, the preamble is also executed when an IB is resumed, which can
* happen in the middle of it.
*
* \param cs Command stream
* \param preamble_ib Preamble IB for the context.
* \param preamble_num_dw Number of dwords in the preamble IB.
* \param preamble_changed Whether the preamble changed or is the same as the last one.
* \param enable_preemption If this is true, it also enables preemption.
*/
void (*cs_set_preamble)(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib,
unsigned preamble_num_dw, bool preamble_changed);
/**
* Set up and enable mid command buffer preemption for the command stream.
*
* \param cs Command stream
* \param preamble_ib Non-preemptible preamble IB for the context.
* \param preamble_num_dw Number of dwords in the preamble IB.
*/
bool (*cs_setup_preemption)(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib,
unsigned preamble_num_dw);
bool (*cs_set_preamble)(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib,
unsigned preamble_num_dw, bool preamble_changed,
bool enable_preemption);
/**
* Destroy a command stream.

View file

@ -1027,16 +1027,9 @@ amdgpu_cs_create(struct radeon_cmdbuf *rcs,
return true;
}
static void amdgpu_cs_set_preamble(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib,
unsigned preamble_num_dw, bool preamble_changed)
{
/* TODO: implement this properly */
radeon_emit_array(cs, preamble_ib, preamble_num_dw);
}
static bool
amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_ib,
unsigned preamble_num_dw)
static bool amdgpu_cs_set_preamble(struct radeon_cmdbuf *rcs, const uint32_t *preamble_ib,
unsigned preamble_num_dw, bool preamble_changed,
bool enable_preemption)
{
struct amdgpu_cs *cs = amdgpu_cs(rcs);
struct amdgpu_winsys *ws = cs->ws;
@ -1045,6 +1038,23 @@ amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_i
struct pb_buffer *preamble_bo;
uint32_t *map;
assert(preamble_ib);
/* The preamble can be set only once for preemption. */
assert(!enable_preemption || !cs->preamble_ib_bo);
/* The preamble IB causes GPU hangs on Stoney. To be safe, don't use the preamble IB on
* chips older than gfx10, and instead paste the preamble into the main command buffer.
*/
if (ws->info.gfx_level < GFX10) {
radeon_emit_array(rcs, preamble_ib, preamble_num_dw);
return true;
}
if (!preamble_changed && !enable_preemption) {
assert(cs->preamble_ib_bo); /* we shouldn't get no-change calls with no preamble */
return true;
}
/* Create the preamble IB buffer. */
preamble_bo = amdgpu_bo_create(ws, size, ws->info.ib_alignment,
RADEON_DOMAIN_VRAM,
@ -1070,15 +1080,20 @@ amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_i
map[preamble_num_dw++] = PKT3_NOP_PAD;
amdgpu_bo_unmap(&ws->dummy_ws.base, preamble_bo);
/* Wait until the CS job finishes, so that we don't mess up IB_PREAMBLE while the IB is being
* submitted.
*/
amdgpu_cs_sync_flush(rcs);
for (unsigned i = 0; i < 2; i++) {
csc[i]->ib[IB_PREAMBLE].va_start = amdgpu_winsys_bo(preamble_bo)->va;
csc[i]->ib[IB_PREAMBLE].ib_bytes = preamble_num_dw * 4;
csc[i]->ib[IB_MAIN].flags |= AMDGPU_IB_FLAG_PREEMPT;
if (enable_preemption)
csc[i]->ib[IB_MAIN].flags |= AMDGPU_IB_FLAG_PREEMPT;
}
assert(!cs->preamble_ib_bo);
cs->preamble_ib_bo = preamble_bo;
radeon_bo_reference(&ws->dummy_ws.base, &cs->preamble_ib_bo, preamble_bo);
amdgpu_cs_add_buffer(rcs, cs->preamble_ib_bo,
RADEON_USAGE_READ | RADEON_PRIO_IB, 0);
@ -1841,7 +1856,6 @@ void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws)
ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status;
ws->base.cs_create = amdgpu_cs_create;
ws->base.cs_set_preamble = amdgpu_cs_set_preamble;
ws->base.cs_setup_preemption = amdgpu_cs_setup_preemption;
ws->base.cs_destroy = amdgpu_cs_destroy;
ws->base.cs_add_buffer = amdgpu_cs_add_buffer;
ws->base.cs_validate = amdgpu_cs_validate;

View file

@ -216,11 +216,15 @@ radeon_drm_cs_create(struct radeon_cmdbuf *rcs,
return true;
}
static void radeon_drm_cs_set_preamble(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib,
unsigned preamble_num_dw, bool preamble_changed)
static bool radeon_drm_cs_set_preamble(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib,
unsigned preamble_num_dw, bool preamble_changed,
bool enable_preemption)
{
assert(!enable_preemption);
/* The radeon kernel driver doesn't support preambles. */
radeon_emit_array(cs, preamble_ib, preamble_num_dw);
return true;
}
int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo)