mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-02 12:18:09 +02:00
winsys/amdgpu: use AMDGPU_IB_FLAG_PREAMBLE for the CS preamble on gfx10+
This skips the preamble for following IBs if the queue receives IBs from
the same context back-to-back. This eliminates VGT_FLUSH (for tess and
legacy GS) and PS_PARTIAL_FLUSH (for gfx11) in those cases if the preamble
contains them.
v2: only use this on gfx10+ due to stability issues on Stoney and limited
testing
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16885>
This commit is contained in:
parent
1592921c59
commit
8edafaa25c
5 changed files with 44 additions and 31 deletions
|
|
@ -196,8 +196,8 @@ void si_init_cp_reg_shadowing(struct si_context *sctx)
|
|||
/* Setup preemption. The shadowing preamble will be executed as a preamble IB,
|
||||
* which will load register values from memory on a context switch.
|
||||
*/
|
||||
sctx->ws->cs_setup_preemption(&sctx->gfx_cs, shadowing_preamble->pm4,
|
||||
shadowing_preamble->ndw);
|
||||
sctx->ws->cs_set_preamble(&sctx->gfx_cs, shadowing_preamble->pm4, shadowing_preamble->ndw,
|
||||
false, true);
|
||||
si_pm4_free_state(sctx, shadowing_preamble, ~0);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -440,7 +440,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs)
|
|||
struct si_pm4_state *preamble = is_secure ? ctx->cs_preamble_state_tmz :
|
||||
ctx->cs_preamble_state;
|
||||
ctx->ws->cs_set_preamble(&ctx->gfx_cs, preamble->pm4, preamble->ndw,
|
||||
preamble != ctx->last_preamble);
|
||||
preamble != ctx->last_preamble, false);
|
||||
ctx->last_preamble = preamble;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -510,23 +510,18 @@ struct radeon_winsys {
|
|||
* the command buffer. If the winsys doesn't support preambles, the packets are inserted
|
||||
* into the command buffer.
|
||||
*
|
||||
* If preemption is enabled, the preamble is also executed when an IB is resumed, which can
|
||||
* happen in the middle of it.
|
||||
*
|
||||
* \param cs Command stream
|
||||
* \param preamble_ib Preamble IB for the context.
|
||||
* \param preamble_num_dw Number of dwords in the preamble IB.
|
||||
* \param preamble_changed Whether the preamble changed or is the same as the last one.
|
||||
* \param enable_preemption If this is true, it also enables preemption.
|
||||
*/
|
||||
void (*cs_set_preamble)(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib,
|
||||
unsigned preamble_num_dw, bool preamble_changed);
|
||||
|
||||
/**
|
||||
* Set up and enable mid command buffer preemption for the command stream.
|
||||
*
|
||||
* \param cs Command stream
|
||||
* \param preamble_ib Non-preemptible preamble IB for the context.
|
||||
* \param preamble_num_dw Number of dwords in the preamble IB.
|
||||
*/
|
||||
bool (*cs_setup_preemption)(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib,
|
||||
unsigned preamble_num_dw);
|
||||
bool (*cs_set_preamble)(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib,
|
||||
unsigned preamble_num_dw, bool preamble_changed,
|
||||
bool enable_preemption);
|
||||
|
||||
/**
|
||||
* Destroy a command stream.
|
||||
|
|
|
|||
|
|
@ -1027,16 +1027,9 @@ amdgpu_cs_create(struct radeon_cmdbuf *rcs,
|
|||
return true;
|
||||
}
|
||||
|
||||
static void amdgpu_cs_set_preamble(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib,
|
||||
unsigned preamble_num_dw, bool preamble_changed)
|
||||
{
|
||||
/* TODO: implement this properly */
|
||||
radeon_emit_array(cs, preamble_ib, preamble_num_dw);
|
||||
}
|
||||
|
||||
static bool
|
||||
amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_ib,
|
||||
unsigned preamble_num_dw)
|
||||
static bool amdgpu_cs_set_preamble(struct radeon_cmdbuf *rcs, const uint32_t *preamble_ib,
|
||||
unsigned preamble_num_dw, bool preamble_changed,
|
||||
bool enable_preemption)
|
||||
{
|
||||
struct amdgpu_cs *cs = amdgpu_cs(rcs);
|
||||
struct amdgpu_winsys *ws = cs->ws;
|
||||
|
|
@ -1045,6 +1038,23 @@ amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_i
|
|||
struct pb_buffer *preamble_bo;
|
||||
uint32_t *map;
|
||||
|
||||
assert(preamble_ib);
|
||||
/* The preamble can be set only once for preemption. */
|
||||
assert(!enable_preemption || !cs->preamble_ib_bo);
|
||||
|
||||
/* The preamble IB causes GPU hangs on Stoney. To be safe, don't use the preamble IB on
|
||||
* chips older than gfx10, and instead paste the preamble into the main command buffer.
|
||||
*/
|
||||
if (ws->info.gfx_level < GFX10) {
|
||||
radeon_emit_array(rcs, preamble_ib, preamble_num_dw);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!preamble_changed && !enable_preemption) {
|
||||
assert(cs->preamble_ib_bo); /* we shouldn't get no-change calls with no preamble */
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Create the preamble IB buffer. */
|
||||
preamble_bo = amdgpu_bo_create(ws, size, ws->info.ib_alignment,
|
||||
RADEON_DOMAIN_VRAM,
|
||||
|
|
@ -1070,15 +1080,20 @@ amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_i
|
|||
map[preamble_num_dw++] = PKT3_NOP_PAD;
|
||||
amdgpu_bo_unmap(&ws->dummy_ws.base, preamble_bo);
|
||||
|
||||
/* Wait until the CS job finishes, so that we don't mess up IB_PREAMBLE while the IB is being
|
||||
* submitted.
|
||||
*/
|
||||
amdgpu_cs_sync_flush(rcs);
|
||||
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
csc[i]->ib[IB_PREAMBLE].va_start = amdgpu_winsys_bo(preamble_bo)->va;
|
||||
csc[i]->ib[IB_PREAMBLE].ib_bytes = preamble_num_dw * 4;
|
||||
|
||||
csc[i]->ib[IB_MAIN].flags |= AMDGPU_IB_FLAG_PREEMPT;
|
||||
if (enable_preemption)
|
||||
csc[i]->ib[IB_MAIN].flags |= AMDGPU_IB_FLAG_PREEMPT;
|
||||
}
|
||||
|
||||
assert(!cs->preamble_ib_bo);
|
||||
cs->preamble_ib_bo = preamble_bo;
|
||||
radeon_bo_reference(&ws->dummy_ws.base, &cs->preamble_ib_bo, preamble_bo);
|
||||
|
||||
amdgpu_cs_add_buffer(rcs, cs->preamble_ib_bo,
|
||||
RADEON_USAGE_READ | RADEON_PRIO_IB, 0);
|
||||
|
|
@ -1841,7 +1856,6 @@ void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws)
|
|||
ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status;
|
||||
ws->base.cs_create = amdgpu_cs_create;
|
||||
ws->base.cs_set_preamble = amdgpu_cs_set_preamble;
|
||||
ws->base.cs_setup_preemption = amdgpu_cs_setup_preemption;
|
||||
ws->base.cs_destroy = amdgpu_cs_destroy;
|
||||
ws->base.cs_add_buffer = amdgpu_cs_add_buffer;
|
||||
ws->base.cs_validate = amdgpu_cs_validate;
|
||||
|
|
|
|||
|
|
@ -216,11 +216,15 @@ radeon_drm_cs_create(struct radeon_cmdbuf *rcs,
|
|||
return true;
|
||||
}
|
||||
|
||||
static void radeon_drm_cs_set_preamble(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib,
|
||||
unsigned preamble_num_dw, bool preamble_changed)
|
||||
static bool radeon_drm_cs_set_preamble(struct radeon_cmdbuf *cs, const uint32_t *preamble_ib,
|
||||
unsigned preamble_num_dw, bool preamble_changed,
|
||||
bool enable_preemption)
|
||||
{
|
||||
assert(!enable_preemption);
|
||||
|
||||
/* The radeon kernel driver doesn't support preambles. */
|
||||
radeon_emit_array(cs, preamble_ib, preamble_num_dw);
|
||||
return true;
|
||||
}
|
||||
|
||||
int radeon_lookup_buffer(struct radeon_cs_context *csc, struct radeon_bo *bo)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue