radv: stop using set_sh_reg_idx(3) to reduce CP overhead

radeonsi doesn't use it either.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/42202>
This commit is contained in:
Marek Olšák 2026-06-11 21:06:40 -04:00 committed by Marge Bot
parent d38eb680a2
commit 4f2fa4d2b8
3 changed files with 13 additions and 24 deletions

View file

@ -356,15 +356,6 @@ struct ac_tracked_regs {
#define ac_cmdbuf_set_sh_reg(reg, value) __ac_cmdbuf_set_reg(reg, 0, value, SI_SH, PKT3_SET_SH_REG)
#define ac_cmdbuf_set_sh_reg_idx(info, reg, idx, value) \
do { \
assert((idx)); \
unsigned __opcode = PKT3_SET_SH_REG_INDEX; \
if ((info)->gfx_level < GFX10) \
__opcode = PKT3_SET_SH_REG; \
__ac_cmdbuf_set_reg(reg, idx, value, SI_SH, __opcode); \
} while (0)
#define ac_cmdbuf_emit_32bit_pointer(sh_offset, va, info) \
do { \
assert((va) == 0 || ((va) >> 32) == (info)->address32_hi); \

View file

@ -2953,8 +2953,9 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *sh
radeon_opt_set_context_reg(R_028AB4_VGT_REUSE_OFF, AC_TRACKED_VGT_REUSE_OFF, shader->regs.vs.vgt_reuse_off);
if (pdev->info.gfx_level >= GFX7) {
radeon_set_sh_reg_idx(&pdev->info, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 3, shader->regs.vs.spi_shader_pgm_rsrc3_vs);
radeon_set_sh_reg(R_00B11C_SPI_SHADER_LATE_ALLOC_VS, shader->regs.vs.spi_shader_late_alloc_vs);
radeon_set_sh_reg_seq(R_00B118_SPI_SHADER_PGM_RSRC3_VS, 2);
radeon_emit(shader->regs.vs.spi_shader_pgm_rsrc3_vs);
radeon_emit(shader->regs.vs.spi_shader_late_alloc_vs);
if (pdev->info.gfx_level >= GFX10) {
radeon_set_uconfig_reg(R_030980_GE_PC_ALLOC, shader->regs.ge_pc_alloc);
@ -3040,7 +3041,7 @@ radv_emit_hw_ngg(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *e
radeon_emit(shader->config.rsrc1);
radeon_emit(shader->config.rsrc2);
if (pdev->info.gfx_level >= GFX11)
radeon_set_sh_reg_idx(&pdev->info, shader->regs.pgm_rsrc4, 3, shader->regs.spi_shader_pgm_rsrc4_gs_hs);
radeon_set_sh_reg(shader->regs.pgm_rsrc4, shader->regs.spi_shader_pgm_rsrc4_gs_hs);
}
radeon_end();
}
@ -3145,11 +3146,10 @@ radv_emit_hw_ngg(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *e
gfx12_push_sh_reg(ngg_lds_layout_offset,
SET_SGPR_FIELD(NGG_LDS_LAYOUT_GS_OUT_VERTEX_BASE, shader->info.ngg_info.esgs_ring_size));
} else {
radeon_set_sh_reg_idx(&pdev->info, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3, shader->regs.spi_shader_pgm_rsrc3_gs);
radeon_set_sh_reg(R_00B21C_SPI_SHADER_PGM_RSRC3_GS, shader->regs.spi_shader_pgm_rsrc3_gs);
if (pdev->info.gfx_level < GFX11)
radeon_set_sh_reg_idx(&pdev->info, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3,
shader->regs.spi_shader_pgm_rsrc4_gs_hs);
radeon_set_sh_reg(R_00B204_SPI_SHADER_PGM_RSRC4_GS, shader->regs.spi_shader_pgm_rsrc4_gs_hs);
radeon_set_uconfig_reg(R_030980_GE_PC_ALLOC, shader->regs.ge_pc_alloc);
@ -3178,7 +3178,7 @@ radv_emit_hw_hs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *sh
radeon_set_sh_reg(shader->regs.pgm_lo, va >> 8);
radeon_set_sh_reg(shader->regs.pgm_rsrc1, shader->config.rsrc1);
if (pdev->info.gfx_level >= GFX11)
radeon_set_sh_reg_idx(&pdev->info, shader->regs.pgm_rsrc4, 3, shader->regs.spi_shader_pgm_rsrc4_gs_hs);
radeon_set_sh_reg(shader->regs.pgm_rsrc4, shader->regs.spi_shader_pgm_rsrc4_gs_hs);
} else {
radeon_set_sh_reg_seq(shader->regs.pgm_lo, 4);
radeon_emit(va >> 8);
@ -3240,7 +3240,7 @@ radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer)
radeon_emit(rsrc2);
}
if (pdev->info.gfx_level >= GFX11)
radeon_set_sh_reg_idx(&pdev->info, vs->regs.pgm_rsrc4, 3, rsrc4);
radeon_set_sh_reg(vs->regs.pgm_rsrc4, rsrc4);
}
}
radeon_end();
@ -3304,7 +3304,7 @@ radv_emit_tess_eval_shader(struct radv_cmd_buffer *cmd_buffer)
radeon_emit(rsrc1);
radeon_emit(rsrc2);
if (pdev->info.gfx_level >= GFX11)
radeon_set_sh_reg_idx(&pdev->info, tes->regs.pgm_rsrc4, 3, rsrc4);
radeon_set_sh_reg(tes->regs.pgm_rsrc4, rsrc4);
radeon_emit_32bit_pointer(next_stage_pc_offset, gs->va, &pdev->info);
}
radeon_end();
@ -3377,11 +3377,11 @@ radv_emit_hw_gs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *gs
}
if (pdev->info.gfx_level >= GFX7) {
radeon_set_sh_reg_idx(&pdev->info, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3, gs->regs.spi_shader_pgm_rsrc3_gs);
radeon_set_sh_reg(R_00B21C_SPI_SHADER_PGM_RSRC3_GS, gs->regs.spi_shader_pgm_rsrc3_gs);
}
if (pdev->info.gfx_level >= GFX10) {
radeon_set_sh_reg_idx(&pdev->info, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3, gs->regs.spi_shader_pgm_rsrc4_gs_hs);
radeon_set_sh_reg(R_00B204_SPI_SHADER_PGM_RSRC4_GS, gs->regs.spi_shader_pgm_rsrc4_gs_hs);
}
radeon_end();
@ -3706,7 +3706,7 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer)
radeon_emit(ps->config.rsrc2);
if (pdev->info.gfx_level >= GFX11)
radeon_set_sh_reg_idx(&pdev->info, R_00B004_SPI_SHADER_PGM_RSRC4_PS, 3, ps->regs.ps.spi_shader_pgm_rsrc4_ps);
radeon_set_sh_reg(R_00B004_SPI_SHADER_PGM_RSRC4_PS, ps->regs.ps.spi_shader_pgm_rsrc4_ps);
}
radeon_end();
@ -6134,7 +6134,7 @@ emit_prolog_regs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v
} else {
rsrc4 = (rsrc4 & C_00B404_INST_PREF_SIZE) | S_00B404_INST_PREF_SIZE(prolog->inst_pref_size);
}
radeon_set_sh_reg_idx(&pdev->info, vs_shader->regs.pgm_rsrc4, 3, rsrc4);
radeon_set_sh_reg(vs_shader->regs.pgm_rsrc4, rsrc4);
}
}
radeon_end();

View file

@ -83,8 +83,6 @@ radeon_check_space(struct radeon_winsys *ws, struct ac_cmdbuf *cs, unsigned need
#define radeon_set_sh_reg(reg, value) ac_cmdbuf_set_sh_reg(reg, value)
#define radeon_set_sh_reg_idx(info, reg, idx, value) ac_cmdbuf_set_sh_reg_idx(info, reg, idx, value)
/* Packet building helpers for UCONFIG registers. */
#define radeon_set_uconfig_reg_seq(reg, num) ac_cmdbuf_set_ucfg_reg_seq(reg, num)