radv: precompute fragment shader register values

To make emission faster.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29022>
This commit is contained in:
Samuel Pitoiset 2024-05-02 10:12:07 +02:00 committed by Marge Bot
parent e5bc4d85bb
commit 7f7ef10bea
3 changed files with 31 additions and 13 deletions

View file

@ -3478,7 +3478,6 @@ radv_emit_fragment_shader(const struct radv_device *device, struct radeon_cmdbuf
const struct radv_shader *ps)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
bool param_gen;
uint64_t va;
va = radv_shader_get_va(ps);
@ -3493,20 +3492,11 @@ radv_emit_fragment_shader(const struct radv_device *device, struct radeon_cmdbuf
radeon_emit(ctx_cs, ps->config.spi_ps_input_ena);
radeon_emit(ctx_cs, ps->config.spi_ps_input_addr);
/* Workaround when there are no PS inputs but LDS is used. */
param_gen = pdev->info.gfx_level >= GFX11 && !ps->info.ps.num_interp && ps->config.lds_size;
radeon_set_context_reg(ctx_cs, R_0286D8_SPI_PS_IN_CONTROL,
S_0286D8_NUM_INTERP(ps->info.ps.num_interp) |
S_0286D8_NUM_PRIM_INTERP(ps->info.ps.num_prim_interp) |
S_0286D8_PS_W32_EN(ps->info.wave_size == 32) | S_0286D8_PARAM_GEN(param_gen));
radeon_set_context_reg(ctx_cs, R_028710_SPI_SHADER_Z_FORMAT,
ac_get_spi_shader_z_format(ps->info.ps.writes_z, ps->info.ps.writes_stencil,
ps->info.ps.writes_sample_mask, ps->info.ps.writes_mrt0_alpha));
radeon_set_context_reg(ctx_cs, R_0286D8_SPI_PS_IN_CONTROL, ps->info.regs.ps.spi_ps_in_control);
radeon_set_context_reg(ctx_cs, R_028710_SPI_SHADER_Z_FORMAT, ps->info.regs.ps.spi_shader_z_format);
if (pdev->info.gfx_level >= GFX9 && pdev->info.gfx_level < GFX11)
radeon_set_context_reg(ctx_cs, R_028C40_PA_SC_SHADER_CONTROL, S_028C40_LOAD_COLLISION_WAVEID(ps->info.ps.pops));
radeon_set_context_reg(ctx_cs, R_028C40_PA_SC_SHADER_CONTROL, ps->info.regs.ps.pa_sc_shader_control);
}
void

View file

@ -1476,6 +1476,25 @@ radv_precompute_registers_hw_gs(struct radv_device *device, struct radv_shader_b
S_028A44_GS_INST_PRIMS_IN_SUBGRP(info->gs_ring_info.gs_inst_prims_in_subgroup);
}
static void
radv_precompute_registers_hw_fs(struct radv_device *device, struct radv_shader_binary *binary)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_shader_info *info = &binary->info;
const bool param_gen = pdev->info.gfx_level >= GFX11 && !info->ps.num_interp && binary->config.lds_size;
info->regs.ps.spi_ps_in_control = S_0286D8_NUM_INTERP(info->ps.num_interp) |
S_0286D8_NUM_PRIM_INTERP(info->ps.num_prim_interp) |
S_0286D8_PS_W32_EN(info->wave_size == 32) | S_0286D8_PARAM_GEN(param_gen);
info->regs.ps.spi_shader_z_format = ac_get_spi_shader_z_format(
info->ps.writes_z, info->ps.writes_stencil, info->ps.writes_sample_mask, info->ps.writes_mrt0_alpha);
if (pdev->info.gfx_level >= GFX9 && pdev->info.gfx_level < GFX11)
info->regs.ps.pa_sc_shader_control = S_028C40_LOAD_COLLISION_WAVEID(info->ps.pops);
}
static void
radv_precompute_registers_hw_cs(struct radv_device *device, struct radv_shader_binary *binary)
{
@ -1498,6 +1517,9 @@ radv_precompute_registers(struct radv_device *device, struct radv_shader_binary
if (!info->is_ngg)
radv_precompute_registers_hw_gs(device, binary);
break;
case MESA_SHADER_FRAGMENT:
radv_precompute_registers_hw_fs(device, binary);
break;
case MESA_SHADER_COMPUTE:
case MESA_SHADER_TASK:
radv_precompute_registers_hw_cs(device, binary);

View file

@ -259,6 +259,12 @@ struct radv_shader_info {
uint32_t vgt_gs_onchip_cntl;
} gs;
struct {
uint32_t pa_sc_shader_control;
uint32_t spi_ps_in_control;
uint32_t spi_shader_z_format;
} ps;
struct {
uint32_t compute_num_thread_x;
uint32_t compute_num_thread_y;