mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-07 04:20:18 +01:00
radeonsi: eliminate redundant SPI_SHADER_PGM_RSRC3/4_GS register writes
They don't change much. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12343>
This commit is contained in:
parent
3df035d08c
commit
edb5fa4d59
5 changed files with 52 additions and 16 deletions
|
|
@ -259,6 +259,16 @@
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
#define radeon_opt_set_sh_reg(sctx, offset, reg, val) do { \
|
||||
unsigned __value = val; \
|
||||
if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
|
||||
sctx->tracked_regs.reg_value[reg] != __value) { \
|
||||
radeon_set_sh_reg(cs, offset, __value); \
|
||||
sctx->tracked_regs.reg_saved |= BITFIELD64_BIT(reg); \
|
||||
sctx->tracked_regs.reg_value[reg] = __value; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define radeon_set_privileged_config_reg(cs, reg, value) do { \
|
||||
assert((reg) < CIK_UCONFIG_REG_OFFSET); \
|
||||
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); \
|
||||
|
|
|
|||
|
|
@ -294,7 +294,7 @@ void si_set_tracked_regs_to_clear_state(struct si_context *ctx)
|
|||
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL] = 0x0000001e; /* From GFX8 */
|
||||
|
||||
/* Set all cleared context registers to saved. */
|
||||
ctx->tracked_regs.reg_saved = ~(1ull << SI_TRACKED_GE_PC_ALLOC); /* uconfig reg */
|
||||
ctx->tracked_regs.reg_saved = BITFIELD64_MASK(SI_TRACKED_GE_PC_ALLOC);
|
||||
ctx->last_gs_out_prim = 0; /* cleared by CLEAR_STATE */
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -839,6 +839,8 @@ struct si_shader {
|
|||
unsigned vgt_gs_onchip_cntl;
|
||||
unsigned vgt_gs_max_prims_per_subgroup;
|
||||
unsigned vgt_esgs_ring_itemsize;
|
||||
unsigned spi_shader_pgm_rsrc3_gs;
|
||||
unsigned spi_shader_pgm_rsrc4_gs;
|
||||
} gs;
|
||||
|
||||
struct {
|
||||
|
|
@ -855,6 +857,8 @@ struct si_shader {
|
|||
unsigned pa_cl_ngg_cntl;
|
||||
unsigned vgt_gs_max_vert_out; /* for API GS */
|
||||
unsigned ge_pc_alloc; /* uconfig register */
|
||||
unsigned spi_shader_pgm_rsrc3_gs;
|
||||
unsigned spi_shader_pgm_rsrc4_gs;
|
||||
union si_vgt_stages_key vgt_stages;
|
||||
} ngg;
|
||||
|
||||
|
|
|
|||
|
|
@ -342,7 +342,10 @@ enum si_tracked_reg
|
|||
SI_TRACKED_VGT_TF_PARAM,
|
||||
SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
|
||||
|
||||
/* Non-context registers: */
|
||||
SI_TRACKED_GE_PC_ALLOC,
|
||||
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
|
||||
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
|
||||
|
||||
SI_NUM_TRACKED_REGS,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -819,6 +819,20 @@ static void si_emit_shader_gs(struct si_context *sctx)
|
|||
shader->vgt_vertex_reuse_block_cntl);
|
||||
}
|
||||
radeon_end_update_context_roll(sctx);
|
||||
|
||||
/* These don't cause any context rolls. */
|
||||
radeon_begin_again(&sctx->gfx_cs);
|
||||
if (sctx->chip_class >= GFX7) {
|
||||
radeon_opt_set_sh_reg(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
|
||||
shader->ctx_reg.gs.spi_shader_pgm_rsrc3_gs);
|
||||
}
|
||||
if (sctx->chip_class >= GFX10) {
|
||||
radeon_opt_set_sh_reg(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
||||
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
|
||||
shader->ctx_reg.gs.spi_shader_pgm_rsrc4_gs);
|
||||
}
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
|
||||
|
|
@ -923,13 +937,11 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
|
|||
|
||||
si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS, rsrc1);
|
||||
si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, rsrc2);
|
||||
si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||
S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F));
|
||||
|
||||
if (sscreen->info.chip_class >= GFX10) {
|
||||
si_pm4_set_reg(pm4, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
||||
S_00B204_CU_EN(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0));
|
||||
}
|
||||
shader->ctx_reg.gs.spi_shader_pgm_rsrc3_gs = S_00B21C_CU_EN(0xffff) |
|
||||
S_00B21C_WAVE_LIMIT(0x3F);
|
||||
shader->ctx_reg.gs.spi_shader_pgm_rsrc4_gs =
|
||||
S_00B204_CU_EN(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0);
|
||||
|
||||
shader->ctx_reg.gs.vgt_gs_onchip_cntl =
|
||||
S_028A44_ES_VERTS_PER_SUBGRP(shader->gs_info.es_verts_per_subgroup) |
|
||||
|
|
@ -944,10 +956,9 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
|
|||
|
||||
polaris_set_vgt_vertex_reuse(sscreen, shader->key.part.gs.es, shader);
|
||||
} else {
|
||||
if (sscreen->info.chip_class >= GFX7) {
|
||||
si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||
S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F));
|
||||
}
|
||||
shader->ctx_reg.gs.spi_shader_pgm_rsrc3_gs = S_00B21C_CU_EN(0xffff) |
|
||||
S_00B21C_WAVE_LIMIT(0x3F);
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
|
||||
si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS,
|
||||
S_00B224_MEM_BASE(sscreen->info.address32_hi >> 8));
|
||||
|
|
@ -1029,6 +1040,15 @@ static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, struct si_shader
|
|||
|
||||
/* GE_PC_ALLOC is not a context register, so it doesn't cause a context roll. */
|
||||
gfx10_emit_ge_pc_alloc(sctx, shader->ctx_reg.ngg.ge_pc_alloc);
|
||||
|
||||
radeon_begin_again(&sctx->gfx_cs);
|
||||
radeon_opt_set_sh_reg(sctx, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||
SI_TRACKED_SPI_SHADER_PGM_RSRC3_GS,
|
||||
shader->ctx_reg.ngg.spi_shader_pgm_rsrc3_gs);
|
||||
radeon_opt_set_sh_reg(sctx, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
||||
SI_TRACKED_SPI_SHADER_PGM_RSRC4_GS,
|
||||
shader->ctx_reg.ngg.spi_shader_pgm_rsrc4_gs);
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
static void gfx10_emit_shader_ngg_notess_nogs(struct si_context *sctx)
|
||||
|
|
@ -1218,12 +1238,11 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
S_00B22C_USER_SGPR_MSB_GFX10(num_user_sgprs >> 5) |
|
||||
S_00B22C_OC_LDS_EN(es_stage == MESA_SHADER_TESS_EVAL) |
|
||||
S_00B22C_LDS_SIZE(shader->config.lds_size));
|
||||
si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
|
||||
S_00B21C_CU_EN(cu_mask) | S_00B21C_WAVE_LIMIT(0x3F));
|
||||
|
||||
si_pm4_set_reg(
|
||||
pm4, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
|
||||
S_00B204_CU_EN(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64));
|
||||
shader->ctx_reg.ngg.spi_shader_pgm_rsrc3_gs = S_00B21C_CU_EN(cu_mask) |
|
||||
S_00B21C_WAVE_LIMIT(0x3F);
|
||||
shader->ctx_reg.ngg.spi_shader_pgm_rsrc4_gs =
|
||||
S_00B204_CU_EN(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64);
|
||||
|
||||
nparams = MAX2(shader->info.nr_param_exports, 1);
|
||||
shader->ctx_reg.ngg.spi_vs_out_config =
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue