mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 09:48:07 +02:00
radv: precompute more PGM registers for all stages
Less error prone and easier to update for new hardware. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31115>
This commit is contained in:
parent
c7a509f55c
commit
190d46b65d
4 changed files with 151 additions and 128 deletions
|
|
@ -35,15 +35,15 @@ radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv
|
|||
|
||||
va = reloc->va[MESA_SHADER_VERTEX];
|
||||
if (vs->info.vs.as_ls) {
|
||||
radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
radeon_set_sh_reg(cs, vs->info.regs.pgm_lo, va >> 8);
|
||||
} else if (vs->info.vs.as_es) {
|
||||
radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
|
||||
radeon_set_sh_reg_seq(cs, vs->info.regs.pgm_lo, 2);
|
||||
radeon_emit(cs, va >> 8);
|
||||
radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
|
||||
} else if (vs->info.is_ngg) {
|
||||
radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
radeon_set_sh_reg(cs, vs->info.regs.pgm_lo, va >> 8);
|
||||
} else {
|
||||
radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 2);
|
||||
radeon_set_sh_reg_seq(cs, vs->info.regs.pgm_lo, 2);
|
||||
radeon_emit(cs, va >> 8);
|
||||
radeon_emit(cs, S_00B124_MEM_BASE(va >> 40));
|
||||
}
|
||||
|
|
@ -51,18 +51,14 @@ radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv
|
|||
|
||||
/* TCS */
|
||||
if (pipeline->base.shaders[MESA_SHADER_TESS_CTRL]) {
|
||||
const struct radv_shader *tcs = pipeline->base.shaders[MESA_SHADER_TESS_CTRL];
|
||||
|
||||
va = reloc->va[MESA_SHADER_TESS_CTRL];
|
||||
|
||||
if (gfx_level >= GFX9) {
|
||||
if (gfx_level >= GFX12) {
|
||||
radeon_set_sh_reg(cs, R_00B424_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
} else if (gfx_level >= GFX10) {
|
||||
radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
} else {
|
||||
radeon_set_sh_reg(cs, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
}
|
||||
radeon_set_sh_reg(cs, tcs->info.regs.pgm_lo, va >> 8);
|
||||
} else {
|
||||
radeon_set_sh_reg_seq(cs, R_00B420_SPI_SHADER_PGM_LO_HS, 2);
|
||||
radeon_set_sh_reg_seq(cs, tcs->info.regs.pgm_lo, 2);
|
||||
radeon_emit(cs, va >> 8);
|
||||
radeon_emit(cs, S_00B424_MEM_BASE(va >> 40));
|
||||
}
|
||||
|
|
@ -74,17 +70,13 @@ radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv
|
|||
|
||||
va = reloc->va[MESA_SHADER_TESS_EVAL];
|
||||
if (tes->info.is_ngg) {
|
||||
if (gfx_level >= GFX12) {
|
||||
radeon_set_sh_reg(cs, R_00B224_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
} else {
|
||||
radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
}
|
||||
radeon_set_sh_reg(cs, tes->info.regs.pgm_lo, va >> 8);
|
||||
} else if (tes->info.tes.as_es) {
|
||||
radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
|
||||
radeon_set_sh_reg_seq(cs, tes->info.regs.pgm_lo, 2);
|
||||
radeon_emit(cs, va >> 8);
|
||||
radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
|
||||
} else {
|
||||
radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 2);
|
||||
radeon_set_sh_reg_seq(cs, tes->info.regs.pgm_lo, 2);
|
||||
radeon_emit(cs, va >> 8);
|
||||
radeon_emit(cs, S_00B124_MEM_BASE(va >> 40));
|
||||
}
|
||||
|
|
@ -96,20 +88,12 @@ radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv
|
|||
|
||||
va = reloc->va[MESA_SHADER_GEOMETRY];
|
||||
if (gs->info.is_ngg) {
|
||||
if (gfx_level >= GFX12) {
|
||||
radeon_set_sh_reg(cs, R_00B224_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
} else {
|
||||
radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
}
|
||||
radeon_set_sh_reg(cs, gs->info.regs.pgm_lo, va >> 8);
|
||||
} else {
|
||||
if (gfx_level >= GFX9) {
|
||||
if (gfx_level >= GFX10) {
|
||||
radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
} else {
|
||||
radeon_set_sh_reg(cs, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
}
|
||||
radeon_set_sh_reg(cs, gs->info.regs.pgm_lo, va >> 8);
|
||||
} else {
|
||||
radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 2);
|
||||
radeon_set_sh_reg_seq(cs, gs->info.regs.pgm_lo, 2);
|
||||
radeon_emit(cs, va >> 8);
|
||||
radeon_emit(cs, S_00B224_MEM_BASE(va >> 40));
|
||||
}
|
||||
|
|
@ -118,22 +102,22 @@ radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv
|
|||
|
||||
/* FS */
|
||||
if (pipeline->base.shaders[MESA_SHADER_FRAGMENT]) {
|
||||
const struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
|
||||
|
||||
va = reloc->va[MESA_SHADER_FRAGMENT];
|
||||
|
||||
radeon_set_sh_reg_seq(cs, R_00B020_SPI_SHADER_PGM_LO_PS, 2);
|
||||
radeon_set_sh_reg_seq(cs, ps->info.regs.pgm_lo, 2);
|
||||
radeon_emit(cs, va >> 8);
|
||||
radeon_emit(cs, S_00B024_MEM_BASE(va >> 40));
|
||||
}
|
||||
|
||||
/* MS */
|
||||
if (pipeline->base.shaders[MESA_SHADER_MESH]) {
|
||||
const struct radv_shader *ms = pipeline->base.shaders[MESA_SHADER_MESH];
|
||||
|
||||
va = reloc->va[MESA_SHADER_MESH];
|
||||
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
radeon_set_sh_reg(cs, R_00B224_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
} else {
|
||||
radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
}
|
||||
radeon_set_sh_reg(cs, ms->info.regs.pgm_lo, va >> 8);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1957,7 +1957,7 @@ radv_emit_ps_epilog_state(struct radv_cmd_buffer *cmd_buffer, struct radv_shader
|
|||
if (G_00B848_VGPRS(ps_epilog->rsrc1) > G_00B848_VGPRS(ps_shader->config.rsrc1)) {
|
||||
uint32_t rsrc1 = ps_shader->config.rsrc1;
|
||||
rsrc1 = (rsrc1 & C_00B848_VGPRS) | (ps_epilog->rsrc1 & ~C_00B848_VGPRS);
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B028_SPI_SHADER_PGM_RSRC1_PS, rsrc1);
|
||||
radeon_set_sh_reg(cmd_buffer->cs, ps_shader->info.regs.pgm_rsrc1, rsrc1);
|
||||
}
|
||||
|
||||
radv_emit_epilog(cmd_buffer, ps_shader, ps_epilog);
|
||||
|
|
@ -1971,13 +1971,13 @@ radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radeon_
|
|||
{
|
||||
uint64_t va = radv_shader_get_va(shader);
|
||||
|
||||
radeon_set_sh_reg(cs, R_00B830_COMPUTE_PGM_LO, va >> 8);
|
||||
radeon_set_sh_reg(cs, shader->info.regs.pgm_lo, va >> 8);
|
||||
|
||||
radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
|
||||
radeon_set_sh_reg_seq(cs, shader->info.regs.pgm_rsrc1, 2);
|
||||
radeon_emit(cs, shader->config.rsrc1);
|
||||
radeon_emit(cs, shader->config.rsrc2);
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, shader->config.rsrc3);
|
||||
radeon_set_sh_reg(cs, shader->info.regs.pgm_rsrc3, shader->config.rsrc3);
|
||||
}
|
||||
|
||||
radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, shader->info.regs.cs.compute_resource_limits);
|
||||
|
|
@ -2018,7 +2018,7 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *sh
|
|||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const uint64_t va = radv_shader_get_va(shader);
|
||||
|
||||
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B120_SPI_SHADER_PGM_LO_VS, 4);
|
||||
radeon_set_sh_reg_seq(cmd_buffer->cs, shader->info.regs.pgm_lo, 4);
|
||||
radeon_emit(cmd_buffer->cs, va >> 8);
|
||||
radeon_emit(cmd_buffer->cs, S_00B124_MEM_BASE(va >> 40));
|
||||
radeon_emit(cmd_buffer->cs, shader->config.rsrc1);
|
||||
|
|
@ -2057,7 +2057,7 @@ radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *sh
|
|||
{
|
||||
const uint64_t va = radv_shader_get_va(shader);
|
||||
|
||||
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B320_SPI_SHADER_PGM_LO_ES, 4);
|
||||
radeon_set_sh_reg_seq(cmd_buffer->cs, shader->info.regs.pgm_lo, 4);
|
||||
radeon_emit(cmd_buffer->cs, va >> 8);
|
||||
radeon_emit(cmd_buffer->cs, S_00B324_MEM_BASE(va >> 40));
|
||||
radeon_emit(cmd_buffer->cs, shader->config.rsrc1);
|
||||
|
|
@ -2069,9 +2069,9 @@ radv_emit_hw_ls(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *sh
|
|||
{
|
||||
const uint64_t va = radv_shader_get_va(shader);
|
||||
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
radeon_set_sh_reg(cmd_buffer->cs, shader->info.regs.pgm_lo, va >> 8);
|
||||
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, shader->config.rsrc1);
|
||||
radeon_set_sh_reg(cmd_buffer->cs, shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -2094,13 +2094,9 @@ radv_emit_hw_ngg(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *e
|
|||
}
|
||||
|
||||
if (!shader->info.merged_shader_compiled_separately) {
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B224_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
} else {
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
}
|
||||
radeon_set_sh_reg(cmd_buffer->cs, shader->info.regs.pgm_lo, va >> 8);
|
||||
|
||||
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
|
||||
radeon_set_sh_reg_seq(cmd_buffer->cs, shader->info.regs.pgm_rsrc1, 2);
|
||||
radeon_emit(cmd_buffer->cs, shader->config.rsrc1);
|
||||
radeon_emit(cmd_buffer->cs, shader->config.rsrc2);
|
||||
}
|
||||
|
|
@ -2197,17 +2193,10 @@ radv_emit_hw_hs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *sh
|
|||
const uint64_t va = radv_shader_get_va(shader);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B424_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
} else if (pdev->info.gfx_level >= GFX10) {
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
} else {
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
}
|
||||
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B428_SPI_SHADER_PGM_RSRC1_HS, shader->config.rsrc1);
|
||||
radeon_set_sh_reg(cmd_buffer->cs, shader->info.regs.pgm_lo, va >> 8);
|
||||
radeon_set_sh_reg(cmd_buffer->cs, shader->info.regs.pgm_rsrc1, shader->config.rsrc1);
|
||||
} else {
|
||||
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B420_SPI_SHADER_PGM_LO_HS, 4);
|
||||
radeon_set_sh_reg_seq(cmd_buffer->cs, shader->info.regs.pgm_lo, 4);
|
||||
radeon_emit(cmd_buffer->cs, va >> 8);
|
||||
radeon_emit(cmd_buffer->cs, S_00B424_MEM_BASE(va >> 40));
|
||||
radeon_emit(cmd_buffer->cs, shader->config.rsrc1);
|
||||
|
|
@ -2234,29 +2223,15 @@ radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer)
|
|||
if (!vs->info.vs.has_prolog) {
|
||||
uint32_t rsrc1, rsrc2;
|
||||
|
||||
radeon_set_sh_reg(cmd_buffer->cs, vs->info.regs.pgm_lo, vs->va >> 8);
|
||||
|
||||
if (vs->info.next_stage == MESA_SHADER_TESS_CTRL) {
|
||||
radv_shader_combine_cfg_vs_tcs(vs, next_stage, &rsrc1, NULL);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B424_SPI_SHADER_PGM_LO_LS, vs->va >> 8);
|
||||
} else if (pdev->info.gfx_level >= GFX10) {
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B520_SPI_SHADER_PGM_LO_LS, vs->va >> 8);
|
||||
} else {
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B410_SPI_SHADER_PGM_LO_LS, vs->va >> 8);
|
||||
}
|
||||
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B428_SPI_SHADER_PGM_RSRC1_HS, rsrc1);
|
||||
radeon_set_sh_reg(cmd_buffer->cs, vs->info.regs.pgm_rsrc1, rsrc1);
|
||||
} else {
|
||||
radv_shader_combine_cfg_vs_gs(vs, next_stage, &rsrc1, &rsrc2);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B224_SPI_SHADER_PGM_LO_ES, vs->va >> 8);
|
||||
} else if (pdev->info.gfx_level >= GFX10) {
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B320_SPI_SHADER_PGM_LO_ES, vs->va >> 8);
|
||||
} else {
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B210_SPI_SHADER_PGM_LO_ES, vs->va >> 8);
|
||||
}
|
||||
|
||||
unsigned lds_size;
|
||||
if (next_stage->info.is_ngg) {
|
||||
lds_size = DIV_ROUND_UP(next_stage->info.ngg_info.lds_size, pdev->info.lds_encode_granularity);
|
||||
|
|
@ -2264,7 +2239,7 @@ radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer)
|
|||
lds_size = next_stage->info.gs_ring_info.lds_size;
|
||||
}
|
||||
|
||||
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
|
||||
radeon_set_sh_reg_seq(cmd_buffer->cs, vs->info.regs.pgm_rsrc1, 2);
|
||||
radeon_emit(cmd_buffer->cs, rsrc1);
|
||||
radeon_emit(cmd_buffer->cs, rsrc2 | S_00B22C_LDS_SIZE(lds_size));
|
||||
}
|
||||
|
|
@ -2318,13 +2293,7 @@ radv_emit_tess_eval_shader(struct radv_cmd_buffer *cmd_buffer)
|
|||
|
||||
radv_shader_combine_cfg_tes_gs(tes, gs, &rsrc1, &rsrc2);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B224_SPI_SHADER_PGM_LO_ES, tes->va >> 8);
|
||||
} else if (pdev->info.gfx_level >= GFX10) {
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B320_SPI_SHADER_PGM_LO_ES, tes->va >> 8);
|
||||
} else {
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B210_SPI_SHADER_PGM_LO_ES, tes->va >> 8);
|
||||
}
|
||||
radeon_set_sh_reg(cmd_buffer->cs, tes->info.regs.pgm_lo, tes->va >> 8);
|
||||
|
||||
unsigned lds_size;
|
||||
if (gs->info.is_ngg) {
|
||||
|
|
@ -2333,7 +2302,7 @@ radv_emit_tess_eval_shader(struct radv_cmd_buffer *cmd_buffer)
|
|||
lds_size = gs->info.gs_ring_info.lds_size;
|
||||
}
|
||||
|
||||
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
|
||||
radeon_set_sh_reg_seq(cmd_buffer->cs, tes->info.regs.pgm_rsrc1, 2);
|
||||
radeon_emit(cmd_buffer->cs, rsrc1);
|
||||
radeon_emit(cmd_buffer->cs, rsrc2 | S_00B22C_LDS_SIZE(lds_size));
|
||||
|
||||
|
|
@ -2374,14 +2343,9 @@ radv_emit_hw_gs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *gs
|
|||
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
if (!gs->info.merged_shader_compiled_separately) {
|
||||
radeon_set_sh_reg(cmd_buffer->cs, gs->info.regs.pgm_lo, va >> 8);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
} else {
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
}
|
||||
|
||||
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
|
||||
radeon_set_sh_reg_seq(cmd_buffer->cs, gs->info.regs.pgm_rsrc1, 2);
|
||||
radeon_emit(cmd_buffer->cs, gs->config.rsrc1);
|
||||
radeon_emit(cmd_buffer->cs, gs->config.rsrc2 | S_00B22C_LDS_SIZE(gs_state->lds_size));
|
||||
}
|
||||
|
|
@ -2395,7 +2359,7 @@ radv_emit_hw_gs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *gs
|
|||
gs->info.regs.gs.vgt_gs_max_prims_per_subgroup);
|
||||
}
|
||||
} else {
|
||||
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4);
|
||||
radeon_set_sh_reg_seq(cmd_buffer->cs, gs->info.regs.pgm_lo, 4);
|
||||
radeon_emit(cmd_buffer->cs, va >> 8);
|
||||
radeon_emit(cmd_buffer->cs, S_00B224_MEM_BASE(va >> 40));
|
||||
radeon_emit(cmd_buffer->cs, gs->config.rsrc1);
|
||||
|
|
@ -2664,7 +2628,7 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer)
|
|||
const struct radv_shader *ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT];
|
||||
const uint64_t va = radv_shader_get_va(ps);
|
||||
|
||||
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B020_SPI_SHADER_PGM_LO_PS, 4);
|
||||
radeon_set_sh_reg_seq(cmd_buffer->cs, ps->info.regs.pgm_lo, 4);
|
||||
radeon_emit(cmd_buffer->cs, va >> 8);
|
||||
radeon_emit(cmd_buffer->cs, S_00B024_MEM_BASE(va >> 40));
|
||||
radeon_emit(cmd_buffer->cs, ps->config.rsrc1);
|
||||
|
|
@ -2976,7 +2940,7 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
|
|||
const struct radv_sqtt_shaders_reloc *reloc = pipeline->sqtt_shaders_reloc;
|
||||
const uint64_t va = reloc->va[MESA_SHADER_TASK];
|
||||
|
||||
radeon_set_sh_reg(cmd_buffer->gang.cs, R_00B830_COMPUTE_PGM_LO, va >> 8);
|
||||
radeon_set_sh_reg(cmd_buffer->gang.cs, task_shader->info.regs.pgm_lo, va >> 8);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -3788,11 +3752,11 @@ radv_emit_patch_control_points(struct radv_cmd_buffer *cmd_buffer)
|
|||
hs_rsrc2 |= S_00B42C_LDS_SIZE_GFX9(cmd_buffer->state.tess_lds_size);
|
||||
}
|
||||
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, hs_rsrc2);
|
||||
radeon_set_sh_reg(cmd_buffer->cs, tcs->info.regs.pgm_rsrc2, hs_rsrc2);
|
||||
} else {
|
||||
unsigned ls_rsrc2 = vs->config.rsrc2 | S_00B52C_LDS_SIZE(cmd_buffer->state.tess_lds_size);
|
||||
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
|
||||
radeon_set_sh_reg(cmd_buffer->cs, vs->info.regs.pgm_rsrc2, ls_rsrc2);
|
||||
}
|
||||
|
||||
/* Emit user SGPRs for dynamic patch control points. */
|
||||
|
|
@ -5272,32 +5236,8 @@ emit_prolog_regs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v
|
|||
if (G_00B848_VGPRS(prolog->rsrc1) > G_00B848_VGPRS(rsrc1))
|
||||
rsrc1 = (rsrc1 & C_00B848_VGPRS) | (prolog->rsrc1 & ~C_00B848_VGPRS);
|
||||
|
||||
unsigned pgm_lo_reg = R_00B120_SPI_SHADER_PGM_LO_VS;
|
||||
unsigned rsrc1_reg = R_00B128_SPI_SHADER_PGM_RSRC1_VS;
|
||||
if (vs_shader->info.is_ngg || cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY] == vs_shader ||
|
||||
(vs_shader->info.merged_shader_compiled_separately && vs_shader->info.next_stage == MESA_SHADER_GEOMETRY)) {
|
||||
pgm_lo_reg = chip >= GFX12 ? R_00B224_SPI_SHADER_PGM_LO_ES
|
||||
: chip >= GFX10 ? R_00B320_SPI_SHADER_PGM_LO_ES
|
||||
: R_00B210_SPI_SHADER_PGM_LO_ES;
|
||||
rsrc1_reg = R_00B228_SPI_SHADER_PGM_RSRC1_GS;
|
||||
} else if (cmd_buffer->state.shaders[MESA_SHADER_TESS_CTRL] == vs_shader ||
|
||||
(vs_shader->info.merged_shader_compiled_separately &&
|
||||
vs_shader->info.next_stage == MESA_SHADER_TESS_CTRL)) {
|
||||
pgm_lo_reg = chip >= GFX12 ? R_00B424_SPI_SHADER_PGM_LO_LS
|
||||
: chip >= GFX10 ? R_00B520_SPI_SHADER_PGM_LO_LS
|
||||
: R_00B410_SPI_SHADER_PGM_LO_LS;
|
||||
rsrc1_reg = R_00B428_SPI_SHADER_PGM_RSRC1_HS;
|
||||
} else if (vs_shader->info.vs.as_ls) {
|
||||
pgm_lo_reg = R_00B520_SPI_SHADER_PGM_LO_LS;
|
||||
rsrc1_reg = R_00B528_SPI_SHADER_PGM_RSRC1_LS;
|
||||
} else if (vs_shader->info.vs.as_es) {
|
||||
pgm_lo_reg = R_00B320_SPI_SHADER_PGM_LO_ES;
|
||||
rsrc1_reg = R_00B328_SPI_SHADER_PGM_RSRC1_ES;
|
||||
}
|
||||
|
||||
radeon_set_sh_reg(cmd_buffer->cs, pgm_lo_reg, prolog->va >> 8);
|
||||
|
||||
radeon_set_sh_reg(cmd_buffer->cs, rsrc1_reg, rsrc1);
|
||||
radeon_set_sh_reg(cmd_buffer->cs, vs_shader->info.regs.pgm_lo, prolog->va >> 8);
|
||||
radeon_set_sh_reg(cmd_buffer->cs, vs_shader->info.regs.pgm_rsrc1, rsrc1);
|
||||
|
||||
if (vs_shader->info.merged_shader_compiled_separately) {
|
||||
if (vs_shader->info.next_stage == MESA_SHADER_GEOMETRY) {
|
||||
|
|
@ -5310,9 +5250,9 @@ emit_prolog_regs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v
|
|||
lds_size = gs->info.gs_ring_info.lds_size;
|
||||
}
|
||||
|
||||
radeon_set_sh_reg(cmd_buffer->cs, rsrc1_reg + 4, rsrc2 | S_00B22C_LDS_SIZE(lds_size));
|
||||
radeon_set_sh_reg(cmd_buffer->cs, vs_shader->info.regs.pgm_rsrc2, rsrc2 | S_00B22C_LDS_SIZE(lds_size));
|
||||
} else {
|
||||
radeon_set_sh_reg(cmd_buffer->cs, rsrc1_reg + 4, rsrc2);
|
||||
radeon_set_sh_reg(cmd_buffer->cs, vs_shader->info.regs.pgm_rsrc2, rsrc2);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -12037,12 +11977,13 @@ static void
|
|||
radv_emit_rt_stack_size(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
unsigned rsrc2 = cmd_buffer->state.rt_prolog->config.rsrc2;
|
||||
const struct radv_shader *rt_prolog = cmd_buffer->state.rt_prolog;
|
||||
unsigned rsrc2 = rt_prolog->config.rsrc2;
|
||||
if (cmd_buffer->state.rt_stack_size)
|
||||
rsrc2 |= S_00B12C_SCRATCH_EN(1);
|
||||
|
||||
radeon_check_space(device->ws, cmd_buffer->cs, 3);
|
||||
radeon_set_sh_reg(cmd_buffer->cs, R_00B84C_COMPUTE_PGM_RSRC2, rsrc2);
|
||||
radeon_set_sh_reg(cmd_buffer->cs, rt_prolog->info.regs.pgm_rsrc2, rsrc2);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -1744,11 +1744,104 @@ radv_precompute_registers_hw_cs(struct radv_device *device, struct radv_shader_b
|
|||
info->regs.cs.compute_num_thread_z = S_00B824_NUM_THREAD_FULL(info->cs.block_size[2]);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_precompute_registers_pgm(const struct radv_device *device, struct radv_shader_info *info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
enum ac_hw_stage hw_stage = radv_select_hw_stage(info, gfx_level);
|
||||
|
||||
/* Special case for merged shaders compiled separately with ESO on GFX9+. */
|
||||
if (info->merged_shader_compiled_separately) {
|
||||
if (info->stage == MESA_SHADER_VERTEX && info->next_stage == MESA_SHADER_TESS_CTRL) {
|
||||
hw_stage = AC_HW_HULL_SHADER;
|
||||
} else if ((info->stage == MESA_SHADER_VERTEX || info->stage == MESA_SHADER_TESS_EVAL) &&
|
||||
info->next_stage == MESA_SHADER_GEOMETRY) {
|
||||
hw_stage = info->is_ngg ? AC_HW_NEXT_GEN_GEOMETRY_SHADER : AC_HW_LEGACY_GEOMETRY_SHADER;
|
||||
}
|
||||
}
|
||||
|
||||
switch (hw_stage) {
|
||||
case AC_HW_NEXT_GEN_GEOMETRY_SHADER:
|
||||
assert(gfx_level >= GFX10);
|
||||
if (gfx_level >= GFX12) {
|
||||
info->regs.pgm_lo = R_00B224_SPI_SHADER_PGM_LO_ES;
|
||||
} else {
|
||||
info->regs.pgm_lo = R_00B320_SPI_SHADER_PGM_LO_ES;
|
||||
}
|
||||
|
||||
info->regs.pgm_rsrc1 = R_00B228_SPI_SHADER_PGM_RSRC1_GS;
|
||||
info->regs.pgm_rsrc2 = R_00B22C_SPI_SHADER_PGM_RSRC2_GS;
|
||||
break;
|
||||
case AC_HW_LEGACY_GEOMETRY_SHADER:
|
||||
assert(gfx_level < GFX11);
|
||||
if (gfx_level >= GFX10) {
|
||||
info->regs.pgm_lo = R_00B320_SPI_SHADER_PGM_LO_ES;
|
||||
} else if (gfx_level >= GFX9) {
|
||||
info->regs.pgm_lo = R_00B210_SPI_SHADER_PGM_LO_ES;
|
||||
} else {
|
||||
info->regs.pgm_lo = R_00B220_SPI_SHADER_PGM_LO_GS;
|
||||
}
|
||||
|
||||
info->regs.pgm_rsrc1 = R_00B228_SPI_SHADER_PGM_RSRC1_GS;
|
||||
info->regs.pgm_rsrc2 = R_00B22C_SPI_SHADER_PGM_RSRC2_GS;
|
||||
break;
|
||||
case AC_HW_EXPORT_SHADER:
|
||||
assert(gfx_level < GFX9);
|
||||
info->regs.pgm_lo = R_00B320_SPI_SHADER_PGM_LO_ES;
|
||||
info->regs.pgm_rsrc1 = R_00B328_SPI_SHADER_PGM_RSRC1_ES;
|
||||
info->regs.pgm_rsrc2 = R_00B32C_SPI_SHADER_PGM_RSRC2_ES;
|
||||
break;
|
||||
case AC_HW_LOCAL_SHADER:
|
||||
assert(gfx_level < GFX9);
|
||||
info->regs.pgm_lo = R_00B520_SPI_SHADER_PGM_LO_LS;
|
||||
info->regs.pgm_rsrc1 = R_00B528_SPI_SHADER_PGM_RSRC1_LS;
|
||||
info->regs.pgm_rsrc2 = R_00B52C_SPI_SHADER_PGM_RSRC2_LS;
|
||||
break;
|
||||
case AC_HW_HULL_SHADER:
|
||||
if (gfx_level >= GFX12) {
|
||||
info->regs.pgm_lo = R_00B424_SPI_SHADER_PGM_LO_LS;
|
||||
} else if (gfx_level >= GFX10) {
|
||||
info->regs.pgm_lo = R_00B520_SPI_SHADER_PGM_LO_LS;
|
||||
} else if (gfx_level >= GFX9) {
|
||||
info->regs.pgm_lo = R_00B410_SPI_SHADER_PGM_LO_LS;
|
||||
} else {
|
||||
info->regs.pgm_lo = R_00B420_SPI_SHADER_PGM_LO_HS;
|
||||
}
|
||||
|
||||
info->regs.pgm_rsrc1 = R_00B428_SPI_SHADER_PGM_RSRC1_HS;
|
||||
info->regs.pgm_rsrc2 = R_00B42C_SPI_SHADER_PGM_RSRC2_HS;
|
||||
break;
|
||||
case AC_HW_VERTEX_SHADER:
|
||||
assert(gfx_level < GFX11);
|
||||
info->regs.pgm_lo = R_00B120_SPI_SHADER_PGM_LO_VS;
|
||||
info->regs.pgm_rsrc1 = R_00B128_SPI_SHADER_PGM_RSRC1_VS;
|
||||
info->regs.pgm_rsrc2 = R_00B12C_SPI_SHADER_PGM_RSRC2_VS;
|
||||
break;
|
||||
case AC_HW_PIXEL_SHADER:
|
||||
info->regs.pgm_lo = R_00B020_SPI_SHADER_PGM_LO_PS;
|
||||
info->regs.pgm_rsrc1 = R_00B028_SPI_SHADER_PGM_RSRC1_PS;
|
||||
info->regs.pgm_rsrc2 = R_00B02C_SPI_SHADER_PGM_RSRC2_PS;
|
||||
break;
|
||||
case AC_HW_COMPUTE_SHADER:
|
||||
info->regs.pgm_lo = R_00B830_COMPUTE_PGM_LO;
|
||||
info->regs.pgm_rsrc1 = R_00B848_COMPUTE_PGM_RSRC1;
|
||||
info->regs.pgm_rsrc2 = R_00B84C_COMPUTE_PGM_RSRC2;
|
||||
info->regs.pgm_rsrc3 = R_00B8A0_COMPUTE_PGM_RSRC3;
|
||||
break;
|
||||
default:
|
||||
unreachable("invalid hw stage");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
radv_precompute_registers(struct radv_device *device, struct radv_shader_binary *binary)
|
||||
{
|
||||
struct radv_shader_info *info = &binary->info;
|
||||
|
||||
radv_precompute_registers_pgm(device, info);
|
||||
|
||||
switch (info->stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
if (!info->vs.as_ls && !info->vs.as_es) {
|
||||
|
|
|
|||
|
|
@ -257,6 +257,11 @@ struct radv_shader_info {
|
|||
|
||||
/* Precomputed register values. */
|
||||
struct {
|
||||
uint32_t pgm_lo;
|
||||
uint32_t pgm_rsrc1;
|
||||
uint32_t pgm_rsrc2;
|
||||
uint32_t pgm_rsrc3;
|
||||
|
||||
struct {
|
||||
uint32_t spi_shader_late_alloc_vs;
|
||||
uint32_t spi_shader_pgm_rsrc3_vs;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue