diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index ea493836ce8..f4e93d8b4ec 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -411,6 +411,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs) ctx->flags |= SI_CONTEXT_VGT_FLUSH; si_mark_atom_dirty(ctx, &ctx->atoms.s.cache_flush); + si_mark_atom_dirty(ctx, &ctx->atoms.s.spi_ge_ring_state); if (ctx->screen->attribute_ring) { radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->screen->attribute_ring, diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 464fcf6b716..7f0a4c500d7 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -6467,46 +6467,6 @@ static void gfx10_init_gfx_preamble_state(struct si_context *sctx) PIXEL_PIPE_STATE_CNTL_STRIDE(2) | PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_LO(rb_mask)); si_pm4_cmd_add(pm4, PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_HI(rb_mask)); - - /* We must wait for idle using an EOP event before changing the attribute ring registers. - * Use the bottom-of-pipe EOP event, but increment the PWS counter instead of writing memory. - */ - si_pm4_cmd_add(pm4, PKT3(PKT3_RELEASE_MEM, 6, 0)); - si_pm4_cmd_add(pm4, S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | - S_490_EVENT_INDEX(5) | - S_490_PWS_ENABLE(1)); - si_pm4_cmd_add(pm4, 0); /* DST_SEL, INT_SEL, DATA_SEL */ - si_pm4_cmd_add(pm4, 0); /* ADDRESS_LO */ - si_pm4_cmd_add(pm4, 0); /* ADDRESS_HI */ - si_pm4_cmd_add(pm4, 0); /* DATA_LO */ - si_pm4_cmd_add(pm4, 0); /* DATA_HI */ - si_pm4_cmd_add(pm4, 0); /* INT_CTXID */ - - /* Wait for the PWS counter. */ - si_pm4_cmd_add(pm4, PKT3(PKT3_ACQUIRE_MEM, 6, 0)); - si_pm4_cmd_add(pm4, S_580_PWS_STAGE_SEL(V_580_CP_ME) | - S_580_PWS_COUNTER_SEL(V_580_TS_SELECT) | - S_580_PWS_ENA2(1) | - S_580_PWS_COUNT(0)); - si_pm4_cmd_add(pm4, 0xffffffff); /* GCR_SIZE */ - si_pm4_cmd_add(pm4, 0x01ffffff); /* GCR_SIZE_HI */ - si_pm4_cmd_add(pm4, 0); /* GCR_BASE_LO */ - si_pm4_cmd_add(pm4, 0); /* GCR_BASE_HI */ - si_pm4_cmd_add(pm4, S_585_PWS_ENA(1)); - si_pm4_cmd_add(pm4, 0); /* GCR_CNTL */ - - si_pm4_set_reg(pm4, R_031110_SPI_GS_THROTTLE_CNTL1, 0x12355123); - si_pm4_set_reg(pm4, R_031114_SPI_GS_THROTTLE_CNTL2, 0x1544D); - - assert((sscreen->attribute_ring->gpu_address >> 32) == sscreen->info.address32_hi); - - /* The PS will read inputs from this address. */ - si_pm4_set_reg(pm4, R_031118_SPI_ATTRIBUTE_RING_BASE, - sscreen->attribute_ring->gpu_address >> 16); - si_pm4_set_reg(pm4, R_03111C_SPI_ATTRIBUTE_RING_SIZE, - S_03111C_MEM_SIZE((sscreen->info.attribute_ring_size_per_se >> 16) - 1) | - S_03111C_BIG_PAGE(sscreen->info.discardable_allows_big_page) | - S_03111C_L1_POLICY(1)); } done: diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index ae4e9b584aa..c9e923d665d 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -237,6 +237,7 @@ union si_state_atoms { struct si_atom cache_flush; struct si_atom streamout_begin; /* this must be done after cache_flush */ struct si_atom render_cond; /* this must be after cache_flush */ + struct si_atom spi_ge_ring_state; /* this must be last because it waits for idle. */ } s; struct si_atom array[sizeof(struct si_atoms_s) / sizeof(struct si_atom)]; }; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 266e8b2b105..d3a7e4c2d96 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -4872,11 +4872,58 @@ static void si_emit_spi_map(struct si_context *sctx, unsigned index) radeon_end_update_context_roll(sctx); } +static void si_emit_spi_ge_ring_state(struct si_context *sctx, unsigned index) +{ + struct si_screen *sscreen = sctx->screen; + + if (sctx->gfx_level >= GFX11) { + radeon_begin(&sctx->gfx_cs); + /* We must wait for idle using an EOP event before changing the attribute ring registers. + * Use the bottom-of-pipe EOP event, but increment the PWS counter instead of writing memory. + */ + radeon_emit(PKT3(PKT3_RELEASE_MEM, 6, 0)); + radeon_emit(S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | + S_490_EVENT_INDEX(5) | + S_490_PWS_ENABLE(1)); + radeon_emit(0); /* DST_SEL, INT_SEL, DATA_SEL */ + radeon_emit(0); /* ADDRESS_LO */ + radeon_emit(0); /* ADDRESS_HI */ + radeon_emit(0); /* DATA_LO */ + radeon_emit(0); /* DATA_HI */ + radeon_emit(0); /* INT_CTXID */ + + /* Wait for the PWS counter. */ + radeon_emit(PKT3(PKT3_ACQUIRE_MEM, 6, 0)); + radeon_emit(S_580_PWS_STAGE_SEL(V_580_CP_ME) | + S_580_PWS_COUNTER_SEL(V_580_TS_SELECT) | + S_580_PWS_ENA2(1) | + S_580_PWS_COUNT(0)); + radeon_emit(0xffffffff); /* GCR_SIZE */ + radeon_emit(0x01ffffff); /* GCR_SIZE_HI */ + radeon_emit(0); /* GCR_BASE_LO */ + radeon_emit(0); /* GCR_BASE_HI */ + radeon_emit(S_585_PWS_ENA(1)); + radeon_emit(0); /* GCR_CNTL */ + + assert((sscreen->attribute_ring->gpu_address >> 32) == sscreen->info.address32_hi); + + radeon_set_uconfig_reg_seq(R_031110_SPI_GS_THROTTLE_CNTL1, 4); + radeon_emit(0x12355123); /* SPI_GS_THROTTLE_CNTL1 */ + radeon_emit(0x1544D); /* SPI_GS_THROTTLE_CNTL2 */ + radeon_emit(sscreen->attribute_ring->gpu_address >> 16); /* SPI_ATTRIBUTE_RING_BASE */ + radeon_emit(S_03111C_MEM_SIZE((sscreen->info.attribute_ring_size_per_se >> 16) - 1) | + S_03111C_BIG_PAGE(sscreen->info.discardable_allows_big_page) | + S_03111C_L1_POLICY(1)); /* SPI_ATTRIBUTE_RING_SIZE */ + radeon_end(); + } +} + void si_init_shader_functions(struct si_context *sctx) { sctx->atoms.s.vgt_pipeline_state.emit = si_emit_vgt_pipeline_state; sctx->atoms.s.scratch_state.emit = si_emit_scratch_state; sctx->atoms.s.tess_io_layout.emit = si_emit_tess_io_layout_state; + sctx->atoms.s.spi_ge_ring_state.emit = si_emit_spi_ge_ring_state; sctx->b.create_vs_state = si_create_shader; sctx->b.create_tcs_state = si_create_shader;