mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 13:58:04 +02:00
radv, aco: Consolidate num_interp + num_prim_interp into num_inputs.
num_inputs contains the total number of FS inputs. Note that this also fixes a bug where some calculations in RADV and ACO were missing the per-primitive attributes from the LDS usage of PS. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32220>
This commit is contained in:
parent
e5a9ae912b
commit
e2b8c4a9ac
8 changed files with 21 additions and 19 deletions
|
|
@ -473,7 +473,7 @@ max_suitable_waves(Program* program, uint16_t waves)
|
|||
* These limit occupancy the same way as other stages' LDS usage does.
|
||||
*/
|
||||
unsigned lds_bytes_per_interp = 3 * 16;
|
||||
unsigned lds_param_bytes = lds_bytes_per_interp * program->info.ps.num_interp;
|
||||
unsigned lds_param_bytes = lds_bytes_per_interp * program->info.ps.num_inputs;
|
||||
lds_per_workgroup += align(lds_param_bytes, program->dev.lds_alloc_granule);
|
||||
}
|
||||
unsigned lds_limit = program->wgp_mode ? program->dev.lds_limit * 2 : program->dev.lds_limit;
|
||||
|
|
|
|||
|
|
@ -117,7 +117,7 @@ struct aco_shader_info {
|
|||
uint32_t num_lds_blocks;
|
||||
} tcs;
|
||||
struct {
|
||||
uint32_t num_interp;
|
||||
uint32_t num_inputs;
|
||||
unsigned spi_ps_input_ena;
|
||||
unsigned spi_ps_input_addr;
|
||||
bool has_prolog;
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info, const struct radv
|
|||
ASSIGN_FIELD(vs.tcs_temp_only_input_mask);
|
||||
ASSIGN_FIELD(vs.has_prolog);
|
||||
ASSIGN_FIELD(tcs.num_lds_blocks);
|
||||
ASSIGN_FIELD(ps.num_interp);
|
||||
ASSIGN_FIELD(ps.num_inputs);
|
||||
ASSIGN_FIELD(cs.uses_full_subgroups);
|
||||
aco_info->ps.spi_ps_input_ena = radv->ps.spi_ps_input_ena;
|
||||
aco_info->ps.spi_ps_input_addr = radv->ps.spi_ps_input_addr;
|
||||
|
|
|
|||
|
|
@ -998,7 +998,7 @@ radv_GetPipelineExecutableStatisticsKHR(VkDevice _device, const VkPipelineExecut
|
|||
break;
|
||||
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
s->value.u64 += shader->info.ps.num_interp + shader->info.ps.num_prim_interp;
|
||||
s->value.u64 += shader->info.ps.num_inputs;
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -1728,7 +1728,7 @@ radv_precompute_registers_hw_fs(struct radv_device *device, struct radv_shader_b
|
|||
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
info->regs.ps.spi_ps_in_control = S_028640_PS_W32_EN(info->wave_size == 32);
|
||||
info->regs.ps.spi_gs_out_config_ps = S_00B0C4_NUM_INTERP(info->ps.num_interp);
|
||||
info->regs.ps.spi_gs_out_config_ps = S_00B0C4_NUM_INTERP(info->ps.num_inputs);
|
||||
|
||||
info->regs.ps.pa_sc_hisz_control = S_028BBC_ROUND(2); /* required minimum value */
|
||||
if (info->ps.depth_layout == FRAG_DEPTH_LAYOUT_GREATER)
|
||||
|
|
@ -1737,11 +1737,16 @@ radv_precompute_registers_hw_fs(struct radv_device *device, struct radv_shader_b
|
|||
info->regs.ps.pa_sc_hisz_control |= S_028BBC_CONSERVATIVE_Z_EXPORT(V_028BBC_EXPORT_LESS_THAN_Z);
|
||||
} else {
|
||||
/* GFX11 workaround when there are no PS inputs but LDS is used. */
|
||||
const bool param_gen = pdev->info.gfx_level == GFX11 && !info->ps.num_interp && binary->config.lds_size;
|
||||
const bool param_gen = pdev->info.gfx_level == GFX11 && !info->ps.num_inputs && binary->config.lds_size;
|
||||
|
||||
info->regs.ps.spi_ps_in_control = S_0286D8_NUM_INTERP(info->ps.num_interp) |
|
||||
S_0286D8_NUM_PRIM_INTERP(info->ps.num_prim_interp) |
|
||||
S_0286D8_PS_W32_EN(info->wave_size == 32) | S_0286D8_PARAM_GEN(param_gen);
|
||||
info->regs.ps.spi_ps_in_control = S_0286D8_PS_W32_EN(info->wave_size == 32) | S_0286D8_PARAM_GEN(param_gen);
|
||||
|
||||
if (pdev->info.gfx_level != GFX10_3) {
|
||||
info->regs.ps.spi_ps_in_control |= S_0286D8_NUM_INTERP(info->ps.num_inputs);
|
||||
} else {
|
||||
info->regs.ps.spi_ps_in_control |= S_0286D8_NUM_INTERP(info->ps.num_inputs - info->ps.num_prim_interp) |
|
||||
S_0286D8_NUM_PRIM_INTERP(info->ps.num_prim_interp);
|
||||
}
|
||||
|
||||
if (pdev->info.gfx_level >= GFX9 && pdev->info.gfx_level < GFX11)
|
||||
info->regs.ps.pa_sc_shader_control = S_028C40_LOAD_COLLISION_WAVEID(info->ps.pops);
|
||||
|
|
@ -2598,7 +2603,7 @@ radv_get_max_waves(const struct radv_device *device, const struct ac_shader_conf
|
|||
unsigned lds_per_wave = 0;
|
||||
|
||||
if (stage == MESA_SHADER_FRAGMENT) {
|
||||
lds_per_wave = conf->lds_size * gpu_info->lds_encode_granularity + info->ps.num_interp * 48;
|
||||
lds_per_wave = conf->lds_size * gpu_info->lds_encode_granularity + info->ps.num_inputs * 48;
|
||||
lds_per_wave = align(lds_per_wave, gpu_info->lds_alloc_granularity);
|
||||
} else if (stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_TASK) {
|
||||
unsigned max_workgroup_size = info->workgroup_size;
|
||||
|
|
|
|||
|
|
@ -917,20 +917,17 @@ gather_shader_info_fs(const struct radv_device *device, const nir_shader *nir,
|
|||
const struct radv_graphics_state_key *gfx_state, struct radv_shader_info *info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const uint64_t per_primitive_input_mask = nir->info.inputs_read & nir->info.per_primitive_inputs;
|
||||
const unsigned num_per_primitive_inputs = util_bitcount64(per_primitive_input_mask);
|
||||
const unsigned num_inputs = util_bitcount64(nir->info.inputs_read);
|
||||
assert(num_per_primitive_inputs <= num_inputs);
|
||||
|
||||
info->ps.num_interp = num_inputs;
|
||||
info->ps.num_inputs = util_bitcount64(nir->info.inputs_read);
|
||||
info->ps.num_prim_interp = 0;
|
||||
|
||||
if (pdev->info.gfx_level == GFX10_3) {
|
||||
/* GFX10.3 distinguishes NUM_INTERP and NUM_PRIM_INTERP, but
|
||||
* these are counted together in NUM_INTERP on GFX11.
|
||||
*/
|
||||
info->ps.num_interp = num_inputs - num_per_primitive_inputs;
|
||||
info->ps.num_prim_interp = num_per_primitive_inputs;
|
||||
const uint64_t per_primitive_input_mask = nir->info.inputs_read & nir->info.per_primitive_inputs;
|
||||
info->ps.num_prim_interp = util_bitcount64(per_primitive_input_mask);
|
||||
assert(info->ps.num_prim_interp <= info->ps.num_inputs);
|
||||
}
|
||||
|
||||
info->ps.can_discard = nir->info.fs.uses_discard;
|
||||
|
|
|
|||
|
|
@ -182,7 +182,7 @@ struct radv_shader_info {
|
|||
uint32_t explicit_strict_shaded_mask;
|
||||
uint32_t float16_shaded_mask;
|
||||
uint32_t float16_hi_shaded_mask;
|
||||
uint32_t num_interp;
|
||||
uint32_t num_inputs;
|
||||
uint32_t num_prim_interp;
|
||||
bool can_discard;
|
||||
bool early_fragment_test;
|
||||
|
|
|
|||
|
|
@ -91,7 +91,7 @@ si_fill_aco_shader_info(struct si_shader *shader, struct aco_shader_info *info,
|
|||
info->tcs.tcs_offchip_layout = args->tcs_offchip_layout;
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
info->ps.num_interp = si_get_ps_num_interp(shader);
|
||||
info->ps.num_inputs = si_get_ps_num_interp(shader);
|
||||
info->ps.spi_ps_input_ena = shader->config.spi_ps_input_ena;
|
||||
info->ps.spi_ps_input_addr = shader->config.spi_ps_input_addr;
|
||||
info->ps.alpha_reference = args->alpha_reference;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue