From 7f7ef10bea7de974e9ccf367bb8bddd7bce0ea2d Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 2 May 2024 10:12:07 +0200 Subject: [PATCH] radv: precompute fragment shader register values To make emission faster. Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_pipeline_graphics.c | 16 +++------------- src/amd/vulkan/radv_shader.c | 22 ++++++++++++++++++++++ src/amd/vulkan/radv_shader_info.h | 6 ++++++ 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index e258dbf0584..35c65bd48df 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -3478,7 +3478,6 @@ radv_emit_fragment_shader(const struct radv_device *device, struct radeon_cmdbuf const struct radv_shader *ps) { const struct radv_physical_device *pdev = radv_device_physical(device); - bool param_gen; uint64_t va; va = radv_shader_get_va(ps); @@ -3493,20 +3492,11 @@ radv_emit_fragment_shader(const struct radv_device *device, struct radeon_cmdbuf radeon_emit(ctx_cs, ps->config.spi_ps_input_ena); radeon_emit(ctx_cs, ps->config.spi_ps_input_addr); - /* Workaround when there are no PS inputs but LDS is used. */ - param_gen = pdev->info.gfx_level >= GFX11 && !ps->info.ps.num_interp && ps->config.lds_size; - - radeon_set_context_reg(ctx_cs, R_0286D8_SPI_PS_IN_CONTROL, - S_0286D8_NUM_INTERP(ps->info.ps.num_interp) | - S_0286D8_NUM_PRIM_INTERP(ps->info.ps.num_prim_interp) | - S_0286D8_PS_W32_EN(ps->info.wave_size == 32) | S_0286D8_PARAM_GEN(param_gen)); - - radeon_set_context_reg(ctx_cs, R_028710_SPI_SHADER_Z_FORMAT, - ac_get_spi_shader_z_format(ps->info.ps.writes_z, ps->info.ps.writes_stencil, - ps->info.ps.writes_sample_mask, ps->info.ps.writes_mrt0_alpha)); + radeon_set_context_reg(ctx_cs, R_0286D8_SPI_PS_IN_CONTROL, ps->info.regs.ps.spi_ps_in_control); + radeon_set_context_reg(ctx_cs, R_028710_SPI_SHADER_Z_FORMAT, ps->info.regs.ps.spi_shader_z_format); if (pdev->info.gfx_level >= GFX9 && pdev->info.gfx_level < GFX11) - radeon_set_context_reg(ctx_cs, R_028C40_PA_SC_SHADER_CONTROL, S_028C40_LOAD_COLLISION_WAVEID(ps->info.ps.pops)); + radeon_set_context_reg(ctx_cs, R_028C40_PA_SC_SHADER_CONTROL, ps->info.regs.ps.pa_sc_shader_control); } void diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 82357a1c088..af4c5aeddc5 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -1476,6 +1476,25 @@ radv_precompute_registers_hw_gs(struct radv_device *device, struct radv_shader_b S_028A44_GS_INST_PRIMS_IN_SUBGRP(info->gs_ring_info.gs_inst_prims_in_subgroup); } +static void +radv_precompute_registers_hw_fs(struct radv_device *device, struct radv_shader_binary *binary) +{ + const struct radv_physical_device *pdev = radv_device_physical(device); + struct radv_shader_info *info = &binary->info; + + const bool param_gen = pdev->info.gfx_level >= GFX11 && !info->ps.num_interp && binary->config.lds_size; + + info->regs.ps.spi_ps_in_control = S_0286D8_NUM_INTERP(info->ps.num_interp) | + S_0286D8_NUM_PRIM_INTERP(info->ps.num_prim_interp) | + S_0286D8_PS_W32_EN(info->wave_size == 32) | S_0286D8_PARAM_GEN(param_gen); + + info->regs.ps.spi_shader_z_format = ac_get_spi_shader_z_format( + info->ps.writes_z, info->ps.writes_stencil, info->ps.writes_sample_mask, info->ps.writes_mrt0_alpha); + + if (pdev->info.gfx_level >= GFX9 && pdev->info.gfx_level < GFX11) + info->regs.ps.pa_sc_shader_control = S_028C40_LOAD_COLLISION_WAVEID(info->ps.pops); +} + static void radv_precompute_registers_hw_cs(struct radv_device *device, struct radv_shader_binary *binary) { @@ -1498,6 +1517,9 @@ radv_precompute_registers(struct radv_device *device, struct radv_shader_binary if (!info->is_ngg) radv_precompute_registers_hw_gs(device, binary); break; + case MESA_SHADER_FRAGMENT: + radv_precompute_registers_hw_fs(device, binary); + break; case MESA_SHADER_COMPUTE: case MESA_SHADER_TASK: radv_precompute_registers_hw_cs(device, binary); diff --git a/src/amd/vulkan/radv_shader_info.h b/src/amd/vulkan/radv_shader_info.h index dd0c1ef881f..72e8b93b2c3 100644 --- a/src/amd/vulkan/radv_shader_info.h +++ b/src/amd/vulkan/radv_shader_info.h @@ -259,6 +259,12 @@ struct radv_shader_info { uint32_t vgt_gs_onchip_cntl; } gs; + struct { + uint32_t pa_sc_shader_control; + uint32_t spi_ps_in_control; + uint32_t spi_shader_z_format; + } ps; + struct { uint32_t compute_num_thread_x; uint32_t compute_num_thread_y;