radv: precompute more PGM registers for all stages

Less error prone and easier to update for new hardware.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31115>
This commit is contained in:
Samuel Pitoiset 2024-09-10 16:01:37 +02:00 committed by Marge Bot
parent c7a509f55c
commit 190d46b65d
4 changed files with 151 additions and 128 deletions

View file

@ -35,15 +35,15 @@ radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv
va = reloc->va[MESA_SHADER_VERTEX];
if (vs->info.vs.as_ls) {
radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
radeon_set_sh_reg(cs, vs->info.regs.pgm_lo, va >> 8);
} else if (vs->info.vs.as_es) {
radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
radeon_set_sh_reg_seq(cs, vs->info.regs.pgm_lo, 2);
radeon_emit(cs, va >> 8);
radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
} else if (vs->info.is_ngg) {
radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
radeon_set_sh_reg(cs, vs->info.regs.pgm_lo, va >> 8);
} else {
radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 2);
radeon_set_sh_reg_seq(cs, vs->info.regs.pgm_lo, 2);
radeon_emit(cs, va >> 8);
radeon_emit(cs, S_00B124_MEM_BASE(va >> 40));
}
@ -51,18 +51,14 @@ radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv
/* TCS */
if (pipeline->base.shaders[MESA_SHADER_TESS_CTRL]) {
const struct radv_shader *tcs = pipeline->base.shaders[MESA_SHADER_TESS_CTRL];
va = reloc->va[MESA_SHADER_TESS_CTRL];
if (gfx_level >= GFX9) {
if (gfx_level >= GFX12) {
radeon_set_sh_reg(cs, R_00B424_SPI_SHADER_PGM_LO_LS, va >> 8);
} else if (gfx_level >= GFX10) {
radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
} else {
radeon_set_sh_reg(cs, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
}
radeon_set_sh_reg(cs, tcs->info.regs.pgm_lo, va >> 8);
} else {
radeon_set_sh_reg_seq(cs, R_00B420_SPI_SHADER_PGM_LO_HS, 2);
radeon_set_sh_reg_seq(cs, tcs->info.regs.pgm_lo, 2);
radeon_emit(cs, va >> 8);
radeon_emit(cs, S_00B424_MEM_BASE(va >> 40));
}
@ -74,17 +70,13 @@ radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv
va = reloc->va[MESA_SHADER_TESS_EVAL];
if (tes->info.is_ngg) {
if (gfx_level >= GFX12) {
radeon_set_sh_reg(cs, R_00B224_SPI_SHADER_PGM_LO_ES, va >> 8);
} else {
radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
}
radeon_set_sh_reg(cs, tes->info.regs.pgm_lo, va >> 8);
} else if (tes->info.tes.as_es) {
radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
radeon_set_sh_reg_seq(cs, tes->info.regs.pgm_lo, 2);
radeon_emit(cs, va >> 8);
radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
} else {
radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 2);
radeon_set_sh_reg_seq(cs, tes->info.regs.pgm_lo, 2);
radeon_emit(cs, va >> 8);
radeon_emit(cs, S_00B124_MEM_BASE(va >> 40));
}
@ -96,20 +88,12 @@ radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv
va = reloc->va[MESA_SHADER_GEOMETRY];
if (gs->info.is_ngg) {
if (gfx_level >= GFX12) {
radeon_set_sh_reg(cs, R_00B224_SPI_SHADER_PGM_LO_ES, va >> 8);
} else {
radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
}
radeon_set_sh_reg(cs, gs->info.regs.pgm_lo, va >> 8);
} else {
if (gfx_level >= GFX9) {
if (gfx_level >= GFX10) {
radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
} else {
radeon_set_sh_reg(cs, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
}
radeon_set_sh_reg(cs, gs->info.regs.pgm_lo, va >> 8);
} else {
radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 2);
radeon_set_sh_reg_seq(cs, gs->info.regs.pgm_lo, 2);
radeon_emit(cs, va >> 8);
radeon_emit(cs, S_00B224_MEM_BASE(va >> 40));
}
@ -118,22 +102,22 @@ radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv
/* FS */
if (pipeline->base.shaders[MESA_SHADER_FRAGMENT]) {
const struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
va = reloc->va[MESA_SHADER_FRAGMENT];
radeon_set_sh_reg_seq(cs, R_00B020_SPI_SHADER_PGM_LO_PS, 2);
radeon_set_sh_reg_seq(cs, ps->info.regs.pgm_lo, 2);
radeon_emit(cs, va >> 8);
radeon_emit(cs, S_00B024_MEM_BASE(va >> 40));
}
/* MS */
if (pipeline->base.shaders[MESA_SHADER_MESH]) {
const struct radv_shader *ms = pipeline->base.shaders[MESA_SHADER_MESH];
va = reloc->va[MESA_SHADER_MESH];
if (pdev->info.gfx_level >= GFX12) {
radeon_set_sh_reg(cs, R_00B224_SPI_SHADER_PGM_LO_ES, va >> 8);
} else {
radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
}
radeon_set_sh_reg(cs, ms->info.regs.pgm_lo, va >> 8);
}
}

View file

@ -1957,7 +1957,7 @@ radv_emit_ps_epilog_state(struct radv_cmd_buffer *cmd_buffer, struct radv_shader
if (G_00B848_VGPRS(ps_epilog->rsrc1) > G_00B848_VGPRS(ps_shader->config.rsrc1)) {
uint32_t rsrc1 = ps_shader->config.rsrc1;
rsrc1 = (rsrc1 & C_00B848_VGPRS) | (ps_epilog->rsrc1 & ~C_00B848_VGPRS);
radeon_set_sh_reg(cmd_buffer->cs, R_00B028_SPI_SHADER_PGM_RSRC1_PS, rsrc1);
radeon_set_sh_reg(cmd_buffer->cs, ps_shader->info.regs.pgm_rsrc1, rsrc1);
}
radv_emit_epilog(cmd_buffer, ps_shader, ps_epilog);
@ -1971,13 +1971,13 @@ radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radeon_
{
uint64_t va = radv_shader_get_va(shader);
radeon_set_sh_reg(cs, R_00B830_COMPUTE_PGM_LO, va >> 8);
radeon_set_sh_reg(cs, shader->info.regs.pgm_lo, va >> 8);
radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
radeon_set_sh_reg_seq(cs, shader->info.regs.pgm_rsrc1, 2);
radeon_emit(cs, shader->config.rsrc1);
radeon_emit(cs, shader->config.rsrc2);
if (pdev->info.gfx_level >= GFX10) {
radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, shader->config.rsrc3);
radeon_set_sh_reg(cs, shader->info.regs.pgm_rsrc3, shader->config.rsrc3);
}
radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, shader->info.regs.cs.compute_resource_limits);
@ -2018,7 +2018,7 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *sh
const struct radv_physical_device *pdev = radv_device_physical(device);
const uint64_t va = radv_shader_get_va(shader);
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B120_SPI_SHADER_PGM_LO_VS, 4);
radeon_set_sh_reg_seq(cmd_buffer->cs, shader->info.regs.pgm_lo, 4);
radeon_emit(cmd_buffer->cs, va >> 8);
radeon_emit(cmd_buffer->cs, S_00B124_MEM_BASE(va >> 40));
radeon_emit(cmd_buffer->cs, shader->config.rsrc1);
@ -2057,7 +2057,7 @@ radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *sh
{
const uint64_t va = radv_shader_get_va(shader);
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B320_SPI_SHADER_PGM_LO_ES, 4);
radeon_set_sh_reg_seq(cmd_buffer->cs, shader->info.regs.pgm_lo, 4);
radeon_emit(cmd_buffer->cs, va >> 8);
radeon_emit(cmd_buffer->cs, S_00B324_MEM_BASE(va >> 40));
radeon_emit(cmd_buffer->cs, shader->config.rsrc1);
@ -2069,9 +2069,9 @@ radv_emit_hw_ls(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *sh
{
const uint64_t va = radv_shader_get_va(shader);
radeon_set_sh_reg(cmd_buffer->cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
radeon_set_sh_reg(cmd_buffer->cs, shader->info.regs.pgm_lo, va >> 8);
radeon_set_sh_reg(cmd_buffer->cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, shader->config.rsrc1);
radeon_set_sh_reg(cmd_buffer->cs, shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
}
static void
@ -2094,13 +2094,9 @@ radv_emit_hw_ngg(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *e
}
if (!shader->info.merged_shader_compiled_separately) {
if (pdev->info.gfx_level >= GFX12) {
radeon_set_sh_reg(cmd_buffer->cs, R_00B224_SPI_SHADER_PGM_LO_ES, va >> 8);
} else {
radeon_set_sh_reg(cmd_buffer->cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
}
radeon_set_sh_reg(cmd_buffer->cs, shader->info.regs.pgm_lo, va >> 8);
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
radeon_set_sh_reg_seq(cmd_buffer->cs, shader->info.regs.pgm_rsrc1, 2);
radeon_emit(cmd_buffer->cs, shader->config.rsrc1);
radeon_emit(cmd_buffer->cs, shader->config.rsrc2);
}
@ -2197,17 +2193,10 @@ radv_emit_hw_hs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *sh
const uint64_t va = radv_shader_get_va(shader);
if (pdev->info.gfx_level >= GFX9) {
if (pdev->info.gfx_level >= GFX12) {
radeon_set_sh_reg(cmd_buffer->cs, R_00B424_SPI_SHADER_PGM_LO_LS, va >> 8);
} else if (pdev->info.gfx_level >= GFX10) {
radeon_set_sh_reg(cmd_buffer->cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
} else {
radeon_set_sh_reg(cmd_buffer->cs, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
}
radeon_set_sh_reg(cmd_buffer->cs, R_00B428_SPI_SHADER_PGM_RSRC1_HS, shader->config.rsrc1);
radeon_set_sh_reg(cmd_buffer->cs, shader->info.regs.pgm_lo, va >> 8);
radeon_set_sh_reg(cmd_buffer->cs, shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
} else {
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B420_SPI_SHADER_PGM_LO_HS, 4);
radeon_set_sh_reg_seq(cmd_buffer->cs, shader->info.regs.pgm_lo, 4);
radeon_emit(cmd_buffer->cs, va >> 8);
radeon_emit(cmd_buffer->cs, S_00B424_MEM_BASE(va >> 40));
radeon_emit(cmd_buffer->cs, shader->config.rsrc1);
@ -2234,29 +2223,15 @@ radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer)
if (!vs->info.vs.has_prolog) {
uint32_t rsrc1, rsrc2;
radeon_set_sh_reg(cmd_buffer->cs, vs->info.regs.pgm_lo, vs->va >> 8);
if (vs->info.next_stage == MESA_SHADER_TESS_CTRL) {
radv_shader_combine_cfg_vs_tcs(vs, next_stage, &rsrc1, NULL);
if (pdev->info.gfx_level >= GFX12) {
radeon_set_sh_reg(cmd_buffer->cs, R_00B424_SPI_SHADER_PGM_LO_LS, vs->va >> 8);
} else if (pdev->info.gfx_level >= GFX10) {
radeon_set_sh_reg(cmd_buffer->cs, R_00B520_SPI_SHADER_PGM_LO_LS, vs->va >> 8);
} else {
radeon_set_sh_reg(cmd_buffer->cs, R_00B410_SPI_SHADER_PGM_LO_LS, vs->va >> 8);
}
radeon_set_sh_reg(cmd_buffer->cs, R_00B428_SPI_SHADER_PGM_RSRC1_HS, rsrc1);
radeon_set_sh_reg(cmd_buffer->cs, vs->info.regs.pgm_rsrc1, rsrc1);
} else {
radv_shader_combine_cfg_vs_gs(vs, next_stage, &rsrc1, &rsrc2);
if (pdev->info.gfx_level >= GFX12) {
radeon_set_sh_reg(cmd_buffer->cs, R_00B224_SPI_SHADER_PGM_LO_ES, vs->va >> 8);
} else if (pdev->info.gfx_level >= GFX10) {
radeon_set_sh_reg(cmd_buffer->cs, R_00B320_SPI_SHADER_PGM_LO_ES, vs->va >> 8);
} else {
radeon_set_sh_reg(cmd_buffer->cs, R_00B210_SPI_SHADER_PGM_LO_ES, vs->va >> 8);
}
unsigned lds_size;
if (next_stage->info.is_ngg) {
lds_size = DIV_ROUND_UP(next_stage->info.ngg_info.lds_size, pdev->info.lds_encode_granularity);
@ -2264,7 +2239,7 @@ radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer)
lds_size = next_stage->info.gs_ring_info.lds_size;
}
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
radeon_set_sh_reg_seq(cmd_buffer->cs, vs->info.regs.pgm_rsrc1, 2);
radeon_emit(cmd_buffer->cs, rsrc1);
radeon_emit(cmd_buffer->cs, rsrc2 | S_00B22C_LDS_SIZE(lds_size));
}
@ -2318,13 +2293,7 @@ radv_emit_tess_eval_shader(struct radv_cmd_buffer *cmd_buffer)
radv_shader_combine_cfg_tes_gs(tes, gs, &rsrc1, &rsrc2);
if (pdev->info.gfx_level >= GFX12) {
radeon_set_sh_reg(cmd_buffer->cs, R_00B224_SPI_SHADER_PGM_LO_ES, tes->va >> 8);
} else if (pdev->info.gfx_level >= GFX10) {
radeon_set_sh_reg(cmd_buffer->cs, R_00B320_SPI_SHADER_PGM_LO_ES, tes->va >> 8);
} else {
radeon_set_sh_reg(cmd_buffer->cs, R_00B210_SPI_SHADER_PGM_LO_ES, tes->va >> 8);
}
radeon_set_sh_reg(cmd_buffer->cs, tes->info.regs.pgm_lo, tes->va >> 8);
unsigned lds_size;
if (gs->info.is_ngg) {
@ -2333,7 +2302,7 @@ radv_emit_tess_eval_shader(struct radv_cmd_buffer *cmd_buffer)
lds_size = gs->info.gs_ring_info.lds_size;
}
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
radeon_set_sh_reg_seq(cmd_buffer->cs, tes->info.regs.pgm_rsrc1, 2);
radeon_emit(cmd_buffer->cs, rsrc1);
radeon_emit(cmd_buffer->cs, rsrc2 | S_00B22C_LDS_SIZE(lds_size));
@ -2374,14 +2343,9 @@ radv_emit_hw_gs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *gs
if (pdev->info.gfx_level >= GFX9) {
if (!gs->info.merged_shader_compiled_separately) {
radeon_set_sh_reg(cmd_buffer->cs, gs->info.regs.pgm_lo, va >> 8);
if (pdev->info.gfx_level >= GFX10) {
radeon_set_sh_reg(cmd_buffer->cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
} else {
radeon_set_sh_reg(cmd_buffer->cs, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
}
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
radeon_set_sh_reg_seq(cmd_buffer->cs, gs->info.regs.pgm_rsrc1, 2);
radeon_emit(cmd_buffer->cs, gs->config.rsrc1);
radeon_emit(cmd_buffer->cs, gs->config.rsrc2 | S_00B22C_LDS_SIZE(gs_state->lds_size));
}
@ -2395,7 +2359,7 @@ radv_emit_hw_gs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *gs
gs->info.regs.gs.vgt_gs_max_prims_per_subgroup);
}
} else {
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4);
radeon_set_sh_reg_seq(cmd_buffer->cs, gs->info.regs.pgm_lo, 4);
radeon_emit(cmd_buffer->cs, va >> 8);
radeon_emit(cmd_buffer->cs, S_00B224_MEM_BASE(va >> 40));
radeon_emit(cmd_buffer->cs, gs->config.rsrc1);
@ -2664,7 +2628,7 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer)
const struct radv_shader *ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT];
const uint64_t va = radv_shader_get_va(ps);
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B020_SPI_SHADER_PGM_LO_PS, 4);
radeon_set_sh_reg_seq(cmd_buffer->cs, ps->info.regs.pgm_lo, 4);
radeon_emit(cmd_buffer->cs, va >> 8);
radeon_emit(cmd_buffer->cs, S_00B024_MEM_BASE(va >> 40));
radeon_emit(cmd_buffer->cs, ps->config.rsrc1);
@ -2976,7 +2940,7 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
const struct radv_sqtt_shaders_reloc *reloc = pipeline->sqtt_shaders_reloc;
const uint64_t va = reloc->va[MESA_SHADER_TASK];
radeon_set_sh_reg(cmd_buffer->gang.cs, R_00B830_COMPUTE_PGM_LO, va >> 8);
radeon_set_sh_reg(cmd_buffer->gang.cs, task_shader->info.regs.pgm_lo, va >> 8);
}
}
@ -3788,11 +3752,11 @@ radv_emit_patch_control_points(struct radv_cmd_buffer *cmd_buffer)
hs_rsrc2 |= S_00B42C_LDS_SIZE_GFX9(cmd_buffer->state.tess_lds_size);
}
radeon_set_sh_reg(cmd_buffer->cs, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, hs_rsrc2);
radeon_set_sh_reg(cmd_buffer->cs, tcs->info.regs.pgm_rsrc2, hs_rsrc2);
} else {
unsigned ls_rsrc2 = vs->config.rsrc2 | S_00B52C_LDS_SIZE(cmd_buffer->state.tess_lds_size);
radeon_set_sh_reg(cmd_buffer->cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
radeon_set_sh_reg(cmd_buffer->cs, vs->info.regs.pgm_rsrc2, ls_rsrc2);
}
/* Emit user SGPRs for dynamic patch control points. */
@ -5272,32 +5236,8 @@ emit_prolog_regs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v
if (G_00B848_VGPRS(prolog->rsrc1) > G_00B848_VGPRS(rsrc1))
rsrc1 = (rsrc1 & C_00B848_VGPRS) | (prolog->rsrc1 & ~C_00B848_VGPRS);
unsigned pgm_lo_reg = R_00B120_SPI_SHADER_PGM_LO_VS;
unsigned rsrc1_reg = R_00B128_SPI_SHADER_PGM_RSRC1_VS;
if (vs_shader->info.is_ngg || cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY] == vs_shader ||
(vs_shader->info.merged_shader_compiled_separately && vs_shader->info.next_stage == MESA_SHADER_GEOMETRY)) {
pgm_lo_reg = chip >= GFX12 ? R_00B224_SPI_SHADER_PGM_LO_ES
: chip >= GFX10 ? R_00B320_SPI_SHADER_PGM_LO_ES
: R_00B210_SPI_SHADER_PGM_LO_ES;
rsrc1_reg = R_00B228_SPI_SHADER_PGM_RSRC1_GS;
} else if (cmd_buffer->state.shaders[MESA_SHADER_TESS_CTRL] == vs_shader ||
(vs_shader->info.merged_shader_compiled_separately &&
vs_shader->info.next_stage == MESA_SHADER_TESS_CTRL)) {
pgm_lo_reg = chip >= GFX12 ? R_00B424_SPI_SHADER_PGM_LO_LS
: chip >= GFX10 ? R_00B520_SPI_SHADER_PGM_LO_LS
: R_00B410_SPI_SHADER_PGM_LO_LS;
rsrc1_reg = R_00B428_SPI_SHADER_PGM_RSRC1_HS;
} else if (vs_shader->info.vs.as_ls) {
pgm_lo_reg = R_00B520_SPI_SHADER_PGM_LO_LS;
rsrc1_reg = R_00B528_SPI_SHADER_PGM_RSRC1_LS;
} else if (vs_shader->info.vs.as_es) {
pgm_lo_reg = R_00B320_SPI_SHADER_PGM_LO_ES;
rsrc1_reg = R_00B328_SPI_SHADER_PGM_RSRC1_ES;
}
radeon_set_sh_reg(cmd_buffer->cs, pgm_lo_reg, prolog->va >> 8);
radeon_set_sh_reg(cmd_buffer->cs, rsrc1_reg, rsrc1);
radeon_set_sh_reg(cmd_buffer->cs, vs_shader->info.regs.pgm_lo, prolog->va >> 8);
radeon_set_sh_reg(cmd_buffer->cs, vs_shader->info.regs.pgm_rsrc1, rsrc1);
if (vs_shader->info.merged_shader_compiled_separately) {
if (vs_shader->info.next_stage == MESA_SHADER_GEOMETRY) {
@ -5310,9 +5250,9 @@ emit_prolog_regs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v
lds_size = gs->info.gs_ring_info.lds_size;
}
radeon_set_sh_reg(cmd_buffer->cs, rsrc1_reg + 4, rsrc2 | S_00B22C_LDS_SIZE(lds_size));
radeon_set_sh_reg(cmd_buffer->cs, vs_shader->info.regs.pgm_rsrc2, rsrc2 | S_00B22C_LDS_SIZE(lds_size));
} else {
radeon_set_sh_reg(cmd_buffer->cs, rsrc1_reg + 4, rsrc2);
radeon_set_sh_reg(cmd_buffer->cs, vs_shader->info.regs.pgm_rsrc2, rsrc2);
}
}
@ -12037,12 +11977,13 @@ static void
radv_emit_rt_stack_size(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
unsigned rsrc2 = cmd_buffer->state.rt_prolog->config.rsrc2;
const struct radv_shader *rt_prolog = cmd_buffer->state.rt_prolog;
unsigned rsrc2 = rt_prolog->config.rsrc2;
if (cmd_buffer->state.rt_stack_size)
rsrc2 |= S_00B12C_SCRATCH_EN(1);
radeon_check_space(device->ws, cmd_buffer->cs, 3);
radeon_set_sh_reg(cmd_buffer->cs, R_00B84C_COMPUTE_PGM_RSRC2, rsrc2);
radeon_set_sh_reg(cmd_buffer->cs, rt_prolog->info.regs.pgm_rsrc2, rsrc2);
}
static void

View file

@ -1744,11 +1744,104 @@ radv_precompute_registers_hw_cs(struct radv_device *device, struct radv_shader_b
info->regs.cs.compute_num_thread_z = S_00B824_NUM_THREAD_FULL(info->cs.block_size[2]);
}
static void
radv_precompute_registers_pgm(const struct radv_device *device, struct radv_shader_info *info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
enum ac_hw_stage hw_stage = radv_select_hw_stage(info, gfx_level);
/* Special case for merged shaders compiled separately with ESO on GFX9+. */
if (info->merged_shader_compiled_separately) {
if (info->stage == MESA_SHADER_VERTEX && info->next_stage == MESA_SHADER_TESS_CTRL) {
hw_stage = AC_HW_HULL_SHADER;
} else if ((info->stage == MESA_SHADER_VERTEX || info->stage == MESA_SHADER_TESS_EVAL) &&
info->next_stage == MESA_SHADER_GEOMETRY) {
hw_stage = info->is_ngg ? AC_HW_NEXT_GEN_GEOMETRY_SHADER : AC_HW_LEGACY_GEOMETRY_SHADER;
}
}
switch (hw_stage) {
case AC_HW_NEXT_GEN_GEOMETRY_SHADER:
assert(gfx_level >= GFX10);
if (gfx_level >= GFX12) {
info->regs.pgm_lo = R_00B224_SPI_SHADER_PGM_LO_ES;
} else {
info->regs.pgm_lo = R_00B320_SPI_SHADER_PGM_LO_ES;
}
info->regs.pgm_rsrc1 = R_00B228_SPI_SHADER_PGM_RSRC1_GS;
info->regs.pgm_rsrc2 = R_00B22C_SPI_SHADER_PGM_RSRC2_GS;
break;
case AC_HW_LEGACY_GEOMETRY_SHADER:
assert(gfx_level < GFX11);
if (gfx_level >= GFX10) {
info->regs.pgm_lo = R_00B320_SPI_SHADER_PGM_LO_ES;
} else if (gfx_level >= GFX9) {
info->regs.pgm_lo = R_00B210_SPI_SHADER_PGM_LO_ES;
} else {
info->regs.pgm_lo = R_00B220_SPI_SHADER_PGM_LO_GS;
}
info->regs.pgm_rsrc1 = R_00B228_SPI_SHADER_PGM_RSRC1_GS;
info->regs.pgm_rsrc2 = R_00B22C_SPI_SHADER_PGM_RSRC2_GS;
break;
case AC_HW_EXPORT_SHADER:
assert(gfx_level < GFX9);
info->regs.pgm_lo = R_00B320_SPI_SHADER_PGM_LO_ES;
info->regs.pgm_rsrc1 = R_00B328_SPI_SHADER_PGM_RSRC1_ES;
info->regs.pgm_rsrc2 = R_00B32C_SPI_SHADER_PGM_RSRC2_ES;
break;
case AC_HW_LOCAL_SHADER:
assert(gfx_level < GFX9);
info->regs.pgm_lo = R_00B520_SPI_SHADER_PGM_LO_LS;
info->regs.pgm_rsrc1 = R_00B528_SPI_SHADER_PGM_RSRC1_LS;
info->regs.pgm_rsrc2 = R_00B52C_SPI_SHADER_PGM_RSRC2_LS;
break;
case AC_HW_HULL_SHADER:
if (gfx_level >= GFX12) {
info->regs.pgm_lo = R_00B424_SPI_SHADER_PGM_LO_LS;
} else if (gfx_level >= GFX10) {
info->regs.pgm_lo = R_00B520_SPI_SHADER_PGM_LO_LS;
} else if (gfx_level >= GFX9) {
info->regs.pgm_lo = R_00B410_SPI_SHADER_PGM_LO_LS;
} else {
info->regs.pgm_lo = R_00B420_SPI_SHADER_PGM_LO_HS;
}
info->regs.pgm_rsrc1 = R_00B428_SPI_SHADER_PGM_RSRC1_HS;
info->regs.pgm_rsrc2 = R_00B42C_SPI_SHADER_PGM_RSRC2_HS;
break;
case AC_HW_VERTEX_SHADER:
assert(gfx_level < GFX11);
info->regs.pgm_lo = R_00B120_SPI_SHADER_PGM_LO_VS;
info->regs.pgm_rsrc1 = R_00B128_SPI_SHADER_PGM_RSRC1_VS;
info->regs.pgm_rsrc2 = R_00B12C_SPI_SHADER_PGM_RSRC2_VS;
break;
case AC_HW_PIXEL_SHADER:
info->regs.pgm_lo = R_00B020_SPI_SHADER_PGM_LO_PS;
info->regs.pgm_rsrc1 = R_00B028_SPI_SHADER_PGM_RSRC1_PS;
info->regs.pgm_rsrc2 = R_00B02C_SPI_SHADER_PGM_RSRC2_PS;
break;
case AC_HW_COMPUTE_SHADER:
info->regs.pgm_lo = R_00B830_COMPUTE_PGM_LO;
info->regs.pgm_rsrc1 = R_00B848_COMPUTE_PGM_RSRC1;
info->regs.pgm_rsrc2 = R_00B84C_COMPUTE_PGM_RSRC2;
info->regs.pgm_rsrc3 = R_00B8A0_COMPUTE_PGM_RSRC3;
break;
default:
unreachable("invalid hw stage");
break;
}
}
static void
radv_precompute_registers(struct radv_device *device, struct radv_shader_binary *binary)
{
struct radv_shader_info *info = &binary->info;
radv_precompute_registers_pgm(device, info);
switch (info->stage) {
case MESA_SHADER_VERTEX:
if (!info->vs.as_ls && !info->vs.as_es) {

View file

@ -257,6 +257,11 @@ struct radv_shader_info {
/* Precomputed register values. */
struct {
uint32_t pgm_lo;
uint32_t pgm_rsrc1;
uint32_t pgm_rsrc2;
uint32_t pgm_rsrc3;
struct {
uint32_t spi_shader_late_alloc_vs;
uint32_t spi_shader_pgm_rsrc3_vs;