mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 22:20:14 +01:00
radeonsi/gfx11: program the attribute ring right before draws
This way, we only wait for idle right before draw packets, so that all preceding SET packets can be processed in parallel with draws from the previous IB. Add a new state atom that is emitted last. It only contains code for gfx11, but some code for older chips will be added by the next commit. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27943>
This commit is contained in:
parent
9e08569d6a
commit
b9b7d34d05
4 changed files with 49 additions and 40 deletions
|
|
@ -411,6 +411,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs)
|
|||
ctx->flags |= SI_CONTEXT_VGT_FLUSH;
|
||||
|
||||
si_mark_atom_dirty(ctx, &ctx->atoms.s.cache_flush);
|
||||
si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_ge_ring_state);
|
||||
|
||||
if (ctx->screen->attribute_ring) {
|
||||
radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->screen->attribute_ring,
|
||||
|
|
|
|||
|
|
@ -6467,46 +6467,6 @@ static void gfx10_init_gfx_preamble_state(struct si_context *sctx)
|
|||
PIXEL_PIPE_STATE_CNTL_STRIDE(2) |
|
||||
PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_LO(rb_mask));
|
||||
si_pm4_cmd_add(pm4, PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_HI(rb_mask));
|
||||
|
||||
/* We must wait for idle using an EOP event before changing the attribute ring registers.
|
||||
* Use the bottom-of-pipe EOP event, but increment the PWS counter instead of writing memory.
|
||||
*/
|
||||
si_pm4_cmd_add(pm4, PKT3(PKT3_RELEASE_MEM, 6, 0));
|
||||
si_pm4_cmd_add(pm4, S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) |
|
||||
S_490_EVENT_INDEX(5) |
|
||||
S_490_PWS_ENABLE(1));
|
||||
si_pm4_cmd_add(pm4, 0); /* DST_SEL, INT_SEL, DATA_SEL */
|
||||
si_pm4_cmd_add(pm4, 0); /* ADDRESS_LO */
|
||||
si_pm4_cmd_add(pm4, 0); /* ADDRESS_HI */
|
||||
si_pm4_cmd_add(pm4, 0); /* DATA_LO */
|
||||
si_pm4_cmd_add(pm4, 0); /* DATA_HI */
|
||||
si_pm4_cmd_add(pm4, 0); /* INT_CTXID */
|
||||
|
||||
/* Wait for the PWS counter. */
|
||||
si_pm4_cmd_add(pm4, PKT3(PKT3_ACQUIRE_MEM, 6, 0));
|
||||
si_pm4_cmd_add(pm4, S_580_PWS_STAGE_SEL(V_580_CP_ME) |
|
||||
S_580_PWS_COUNTER_SEL(V_580_TS_SELECT) |
|
||||
S_580_PWS_ENA2(1) |
|
||||
S_580_PWS_COUNT(0));
|
||||
si_pm4_cmd_add(pm4, 0xffffffff); /* GCR_SIZE */
|
||||
si_pm4_cmd_add(pm4, 0x01ffffff); /* GCR_SIZE_HI */
|
||||
si_pm4_cmd_add(pm4, 0); /* GCR_BASE_LO */
|
||||
si_pm4_cmd_add(pm4, 0); /* GCR_BASE_HI */
|
||||
si_pm4_cmd_add(pm4, S_585_PWS_ENA(1));
|
||||
si_pm4_cmd_add(pm4, 0); /* GCR_CNTL */
|
||||
|
||||
si_pm4_set_reg(pm4, R_031110_SPI_GS_THROTTLE_CNTL1, 0x12355123);
|
||||
si_pm4_set_reg(pm4, R_031114_SPI_GS_THROTTLE_CNTL2, 0x1544D);
|
||||
|
||||
assert((sscreen->attribute_ring->gpu_address >> 32) == sscreen->info.address32_hi);
|
||||
|
||||
/* The PS will read inputs from this address. */
|
||||
si_pm4_set_reg(pm4, R_031118_SPI_ATTRIBUTE_RING_BASE,
|
||||
sscreen->attribute_ring->gpu_address >> 16);
|
||||
si_pm4_set_reg(pm4, R_03111C_SPI_ATTRIBUTE_RING_SIZE,
|
||||
S_03111C_MEM_SIZE((sscreen->info.attribute_ring_size_per_se >> 16) - 1) |
|
||||
S_03111C_BIG_PAGE(sscreen->info.discardable_allows_big_page) |
|
||||
S_03111C_L1_POLICY(1));
|
||||
}
|
||||
|
||||
done:
|
||||
|
|
|
|||
|
|
@ -237,6 +237,7 @@ union si_state_atoms {
|
|||
struct si_atom cache_flush;
|
||||
struct si_atom streamout_begin; /* this must be done after cache_flush */
|
||||
struct si_atom render_cond; /* this must be after cache_flush */
|
||||
struct si_atom spi_ge_ring_state; /* this must be last because it waits for idle. */
|
||||
} s;
|
||||
struct si_atom array[sizeof(struct si_atoms_s) / sizeof(struct si_atom)];
|
||||
};
|
||||
|
|
|
|||
|
|
@ -4872,11 +4872,58 @@ static void si_emit_spi_map(struct si_context *sctx, unsigned index)
|
|||
radeon_end_update_context_roll(sctx);
|
||||
}
|
||||
|
||||
static void si_emit_spi_ge_ring_state(struct si_context *sctx, unsigned index)
|
||||
{
|
||||
struct si_screen *sscreen = sctx->screen;
|
||||
|
||||
if (sctx->gfx_level >= GFX11) {
|
||||
radeon_begin(&sctx->gfx_cs);
|
||||
/* We must wait for idle using an EOP event before changing the attribute ring registers.
|
||||
* Use the bottom-of-pipe EOP event, but increment the PWS counter instead of writing memory.
|
||||
*/
|
||||
radeon_emit(PKT3(PKT3_RELEASE_MEM, 6, 0));
|
||||
radeon_emit(S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) |
|
||||
S_490_EVENT_INDEX(5) |
|
||||
S_490_PWS_ENABLE(1));
|
||||
radeon_emit(0); /* DST_SEL, INT_SEL, DATA_SEL */
|
||||
radeon_emit(0); /* ADDRESS_LO */
|
||||
radeon_emit(0); /* ADDRESS_HI */
|
||||
radeon_emit(0); /* DATA_LO */
|
||||
radeon_emit(0); /* DATA_HI */
|
||||
radeon_emit(0); /* INT_CTXID */
|
||||
|
||||
/* Wait for the PWS counter. */
|
||||
radeon_emit(PKT3(PKT3_ACQUIRE_MEM, 6, 0));
|
||||
radeon_emit(S_580_PWS_STAGE_SEL(V_580_CP_ME) |
|
||||
S_580_PWS_COUNTER_SEL(V_580_TS_SELECT) |
|
||||
S_580_PWS_ENA2(1) |
|
||||
S_580_PWS_COUNT(0));
|
||||
radeon_emit(0xffffffff); /* GCR_SIZE */
|
||||
radeon_emit(0x01ffffff); /* GCR_SIZE_HI */
|
||||
radeon_emit(0); /* GCR_BASE_LO */
|
||||
radeon_emit(0); /* GCR_BASE_HI */
|
||||
radeon_emit(S_585_PWS_ENA(1));
|
||||
radeon_emit(0); /* GCR_CNTL */
|
||||
|
||||
assert((sscreen->attribute_ring->gpu_address >> 32) == sscreen->info.address32_hi);
|
||||
|
||||
radeon_set_uconfig_reg_seq(R_031110_SPI_GS_THROTTLE_CNTL1, 4);
|
||||
radeon_emit(0x12355123); /* SPI_GS_THROTTLE_CNTL1 */
|
||||
radeon_emit(0x1544D); /* SPI_GS_THROTTLE_CNTL2 */
|
||||
radeon_emit(sscreen->attribute_ring->gpu_address >> 16); /* SPI_ATTRIBUTE_RING_BASE */
|
||||
radeon_emit(S_03111C_MEM_SIZE((sscreen->info.attribute_ring_size_per_se >> 16) - 1) |
|
||||
S_03111C_BIG_PAGE(sscreen->info.discardable_allows_big_page) |
|
||||
S_03111C_L1_POLICY(1)); /* SPI_ATTRIBUTE_RING_SIZE */
|
||||
radeon_end();
|
||||
}
|
||||
}
|
||||
|
||||
void si_init_shader_functions(struct si_context *sctx)
|
||||
{
|
||||
sctx->atoms.s.vgt_pipeline_state.emit = si_emit_vgt_pipeline_state;
|
||||
sctx->atoms.s.scratch_state.emit = si_emit_scratch_state;
|
||||
sctx->atoms.s.tess_io_layout.emit = si_emit_tess_io_layout_state;
|
||||
sctx->atoms.s.spi_ge_ring_state.emit = si_emit_spi_ge_ring_state;
|
||||
|
||||
sctx->b.create_vs_state = si_create_shader;
|
||||
sctx->b.create_tcs_state = si_create_shader;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue