radv, aco: Consolidate num_interp + num_prim_interp into num_inputs.

num_inputs contains the total number of FS inputs.

Note that this also fixes a bug where some calculations in RADV
and ACO were missing the per-primitive attributes from the LDS
usage of PS.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32220>
This commit is contained in:
Timur Kristóf 2024-11-26 15:56:02 +01:00 committed by Marge Bot
parent e5a9ae912b
commit e2b8c4a9ac
8 changed files with 21 additions and 19 deletions

View file

@ -473,7 +473,7 @@ max_suitable_waves(Program* program, uint16_t waves)
* These limit occupancy the same way as other stages' LDS usage does.
*/
unsigned lds_bytes_per_interp = 3 * 16;
unsigned lds_param_bytes = lds_bytes_per_interp * program->info.ps.num_interp;
unsigned lds_param_bytes = lds_bytes_per_interp * program->info.ps.num_inputs;
lds_per_workgroup += align(lds_param_bytes, program->dev.lds_alloc_granule);
}
unsigned lds_limit = program->wgp_mode ? program->dev.lds_limit * 2 : program->dev.lds_limit;

View file

@ -117,7 +117,7 @@ struct aco_shader_info {
uint32_t num_lds_blocks;
} tcs;
struct {
uint32_t num_interp;
uint32_t num_inputs;
unsigned spi_ps_input_ena;
unsigned spi_ps_input_addr;
bool has_prolog;

View file

@ -37,7 +37,7 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info, const struct radv
ASSIGN_FIELD(vs.tcs_temp_only_input_mask);
ASSIGN_FIELD(vs.has_prolog);
ASSIGN_FIELD(tcs.num_lds_blocks);
ASSIGN_FIELD(ps.num_interp);
ASSIGN_FIELD(ps.num_inputs);
ASSIGN_FIELD(cs.uses_full_subgroups);
aco_info->ps.spi_ps_input_ena = radv->ps.spi_ps_input_ena;
aco_info->ps.spi_ps_input_addr = radv->ps.spi_ps_input_addr;

View file

@ -998,7 +998,7 @@ radv_GetPipelineExecutableStatisticsKHR(VkDevice _device, const VkPipelineExecut
break;
case MESA_SHADER_FRAGMENT:
s->value.u64 += shader->info.ps.num_interp + shader->info.ps.num_prim_interp;
s->value.u64 += shader->info.ps.num_inputs;
break;
default:

View file

@ -1728,7 +1728,7 @@ radv_precompute_registers_hw_fs(struct radv_device *device, struct radv_shader_b
if (pdev->info.gfx_level >= GFX12) {
info->regs.ps.spi_ps_in_control = S_028640_PS_W32_EN(info->wave_size == 32);
info->regs.ps.spi_gs_out_config_ps = S_00B0C4_NUM_INTERP(info->ps.num_interp);
info->regs.ps.spi_gs_out_config_ps = S_00B0C4_NUM_INTERP(info->ps.num_inputs);
info->regs.ps.pa_sc_hisz_control = S_028BBC_ROUND(2); /* required minimum value */
if (info->ps.depth_layout == FRAG_DEPTH_LAYOUT_GREATER)
@ -1737,11 +1737,16 @@ radv_precompute_registers_hw_fs(struct radv_device *device, struct radv_shader_b
info->regs.ps.pa_sc_hisz_control |= S_028BBC_CONSERVATIVE_Z_EXPORT(V_028BBC_EXPORT_LESS_THAN_Z);
} else {
/* GFX11 workaround when there are no PS inputs but LDS is used. */
const bool param_gen = pdev->info.gfx_level == GFX11 && !info->ps.num_interp && binary->config.lds_size;
const bool param_gen = pdev->info.gfx_level == GFX11 && !info->ps.num_inputs && binary->config.lds_size;
info->regs.ps.spi_ps_in_control = S_0286D8_NUM_INTERP(info->ps.num_interp) |
S_0286D8_NUM_PRIM_INTERP(info->ps.num_prim_interp) |
S_0286D8_PS_W32_EN(info->wave_size == 32) | S_0286D8_PARAM_GEN(param_gen);
info->regs.ps.spi_ps_in_control = S_0286D8_PS_W32_EN(info->wave_size == 32) | S_0286D8_PARAM_GEN(param_gen);
if (pdev->info.gfx_level != GFX10_3) {
info->regs.ps.spi_ps_in_control |= S_0286D8_NUM_INTERP(info->ps.num_inputs);
} else {
info->regs.ps.spi_ps_in_control |= S_0286D8_NUM_INTERP(info->ps.num_inputs - info->ps.num_prim_interp) |
S_0286D8_NUM_PRIM_INTERP(info->ps.num_prim_interp);
}
if (pdev->info.gfx_level >= GFX9 && pdev->info.gfx_level < GFX11)
info->regs.ps.pa_sc_shader_control = S_028C40_LOAD_COLLISION_WAVEID(info->ps.pops);
@ -2598,7 +2603,7 @@ radv_get_max_waves(const struct radv_device *device, const struct ac_shader_conf
unsigned lds_per_wave = 0;
if (stage == MESA_SHADER_FRAGMENT) {
lds_per_wave = conf->lds_size * gpu_info->lds_encode_granularity + info->ps.num_interp * 48;
lds_per_wave = conf->lds_size * gpu_info->lds_encode_granularity + info->ps.num_inputs * 48;
lds_per_wave = align(lds_per_wave, gpu_info->lds_alloc_granularity);
} else if (stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_TASK) {
unsigned max_workgroup_size = info->workgroup_size;

View file

@ -917,20 +917,17 @@ gather_shader_info_fs(const struct radv_device *device, const nir_shader *nir,
const struct radv_graphics_state_key *gfx_state, struct radv_shader_info *info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const uint64_t per_primitive_input_mask = nir->info.inputs_read & nir->info.per_primitive_inputs;
const unsigned num_per_primitive_inputs = util_bitcount64(per_primitive_input_mask);
const unsigned num_inputs = util_bitcount64(nir->info.inputs_read);
assert(num_per_primitive_inputs <= num_inputs);
info->ps.num_interp = num_inputs;
info->ps.num_inputs = util_bitcount64(nir->info.inputs_read);
info->ps.num_prim_interp = 0;
if (pdev->info.gfx_level == GFX10_3) {
/* GFX10.3 distinguishes NUM_INTERP and NUM_PRIM_INTERP, but
* these are counted together in NUM_INTERP on GFX11.
*/
info->ps.num_interp = num_inputs - num_per_primitive_inputs;
info->ps.num_prim_interp = num_per_primitive_inputs;
const uint64_t per_primitive_input_mask = nir->info.inputs_read & nir->info.per_primitive_inputs;
info->ps.num_prim_interp = util_bitcount64(per_primitive_input_mask);
assert(info->ps.num_prim_interp <= info->ps.num_inputs);
}
info->ps.can_discard = nir->info.fs.uses_discard;

View file

@ -182,7 +182,7 @@ struct radv_shader_info {
uint32_t explicit_strict_shaded_mask;
uint32_t float16_shaded_mask;
uint32_t float16_hi_shaded_mask;
uint32_t num_interp;
uint32_t num_inputs;
uint32_t num_prim_interp;
bool can_discard;
bool early_fragment_test;

View file

@ -91,7 +91,7 @@ si_fill_aco_shader_info(struct si_shader *shader, struct aco_shader_info *info,
info->tcs.tcs_offchip_layout = args->tcs_offchip_layout;
break;
case MESA_SHADER_FRAGMENT:
info->ps.num_interp = si_get_ps_num_interp(shader);
info->ps.num_inputs = si_get_ps_num_interp(shader);
info->ps.spi_ps_input_ena = shader->config.spi_ps_input_ena;
info->ps.spi_ps_input_addr = shader->config.spi_ps_input_addr;
info->ps.alpha_reference = args->alpha_reference;