diff --git a/src/amd/vulkan/nir/radv_nir.h b/src/amd/vulkan/nir/radv_nir.h index cd779d64e85..0aa56c84380 100644 --- a/src/amd/vulkan/nir/radv_nir.h +++ b/src/amd/vulkan/nir/radv_nir.h @@ -63,6 +63,8 @@ void radv_nir_lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask); unsigned radv_map_io_driver_location(unsigned semantic); +bool radv_recompute_fs_input_bases(nir_shader *nir); + void radv_nir_lower_io(struct radv_device *device, nir_shader *nir); bool radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *stage); diff --git a/src/amd/vulkan/nir/radv_nir_lower_io.c b/src/amd/vulkan/nir/radv_nir_lower_io.c index 57b98a0fe1f..00f138c3d70 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_io.c +++ b/src/amd/vulkan/nir/radv_nir_lower_io.c @@ -53,6 +53,78 @@ radv_nir_lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask) } } +typedef struct { + uint64_t always_per_vertex; + uint64_t potentially_per_primitive; + uint64_t always_per_primitive; + unsigned num_always_per_vertex; + unsigned num_potentially_per_primitive; +} radv_recompute_fs_input_bases_state; + +static bool +radv_recompute_fs_input_bases_callback(UNUSED nir_builder *b, nir_intrinsic_instr *intrin, void *data) +{ + const radv_recompute_fs_input_bases_state *s = (const radv_recompute_fs_input_bases_state *)data; + + /* Filter possible FS input intrinsics */ + switch (intrin->intrinsic) { + case nir_intrinsic_load_input: + case nir_intrinsic_load_per_primitive_input: + case nir_intrinsic_load_interpolated_input: + case nir_intrinsic_load_input_vertex: + break; + default: + return false; + } + + const nir_io_semantics sem = nir_intrinsic_io_semantics(intrin); + const uint64_t location_bit = BITFIELD64_BIT(sem.location); + const uint64_t location_mask = BITFIELD64_MASK(sem.location); + const unsigned old_base = nir_intrinsic_base(intrin); + unsigned new_base = 0; + + if (location_bit & s->always_per_vertex) { + new_base = util_bitcount64(s->always_per_vertex & location_mask); + } else if (location_bit & s->potentially_per_primitive) { + new_base = s->num_always_per_vertex + util_bitcount64(s->potentially_per_primitive & location_mask); + } else if (location_bit & s->always_per_primitive) { + new_base = s->num_always_per_vertex + s->num_potentially_per_primitive + + util_bitcount64(s->always_per_primitive & location_mask); + } else { + unreachable("invalid FS input"); + } + + if (new_base != old_base) { + nir_intrinsic_set_base(intrin, new_base); + return true; + } + + return false; +} + +bool +radv_recompute_fs_input_bases(nir_shader *nir) +{ + const uint64_t always_per_vertex = nir->info.inputs_read & ~nir->info.per_primitive_inputs & + ~(VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT); + + const uint64_t potentially_per_primitive = + nir->info.inputs_read & (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT); + + const uint64_t always_per_primitive = nir->info.inputs_read & nir->info.per_primitive_inputs & + ~(VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT); + + radv_recompute_fs_input_bases_state s = { + .always_per_vertex = always_per_vertex, + .potentially_per_primitive = potentially_per_primitive, + .always_per_primitive = always_per_primitive, + .num_always_per_vertex = util_bitcount64(always_per_vertex), + .num_potentially_per_primitive = util_bitcount64(potentially_per_primitive), + }; + + return nir_shader_intrinsics_pass(nir, radv_recompute_fs_input_bases_callback, nir_metadata_control_flow, &s); +} + void radv_nir_lower_io(struct radv_device *device, nir_shader *nir) { @@ -83,10 +155,10 @@ radv_nir_lower_io(struct radv_device *device, nir_shader *nir) } if (nir->info.stage == MESA_SHADER_FRAGMENT) { - /* Recompute FS input intrinsic bases to make sure that there are no gaps - * between the FS input slots. + /* Recompute FS input intrinsic bases to assign a location to each FS input. + * The computed base will match the index of each input in SPI_PS_INPUT_CNTL_n. */ - nir_recompute_io_bases(nir, nir_var_shader_in); + radv_recompute_fs_input_bases(nir); } NIR_PASS_V(nir, nir_opt_dce); diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 14d2aeaed41..478f438e895 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -2573,17 +2573,6 @@ radv_emit_ps_inputs(struct radv_cmd_buffer *cmd_buffer) uint32_t ps_input_cntl[32]; unsigned ps_offset = 0; - if (!mesh) { - if (ps->info.ps.prim_id_input) - slot_to_ps_input(outinfo, VARYING_SLOT_PRIMITIVE_ID, ps_input_cntl, &ps_offset, false, radv_ps_in_flat); - - if (ps->info.ps.layer_input) - slot_to_ps_input(outinfo, VARYING_SLOT_LAYER, ps_input_cntl, &ps_offset, true, radv_ps_in_flat); - - if (ps->info.ps.viewport_index_input) - slot_to_ps_input(outinfo, VARYING_SLOT_VIEWPORT, ps_input_cntl, &ps_offset, true, radv_ps_in_flat); - } - if (ps->info.ps.has_pcoord) ps_input_cntl[ps_offset++] = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20); @@ -2595,8 +2584,17 @@ radv_emit_ps_inputs(struct radv_cmd_buffer *cmd_buffer) input_mask_to_ps_inputs(outinfo, ps, ps->info.ps.input_mask, ps_input_cntl, &ps_offset, radv_ps_in_flat); - /* Per-primitive PS inputs: the HW needs these to be last. */ - if (mesh) { + /* Potentially per-primitive PS inputs */ + if (!mesh) { + if (ps->info.ps.prim_id_input) + slot_to_ps_input(outinfo, VARYING_SLOT_PRIMITIVE_ID, ps_input_cntl, &ps_offset, false, radv_ps_in_flat); + + if (ps->info.ps.layer_input) + slot_to_ps_input(outinfo, VARYING_SLOT_LAYER, ps_input_cntl, &ps_offset, true, radv_ps_in_flat); + + if (ps->info.ps.viewport_index_input) + slot_to_ps_input(outinfo, VARYING_SLOT_VIEWPORT, ps_input_cntl, &ps_offset, true, radv_ps_in_flat); + } else { if (ps->info.ps.prim_id_input) slot_to_ps_input(outinfo, VARYING_SLOT_PRIMITIVE_ID, ps_input_cntl, &ps_offset, false, per_prim); @@ -2607,6 +2605,7 @@ radv_emit_ps_inputs(struct radv_cmd_buffer *cmd_buffer) slot_to_ps_input(outinfo, VARYING_SLOT_VIEWPORT, ps_input_cntl, &ps_offset, true, per_prim); } + /* Per-primitive PS inputs: the HW needs these to be last. */ input_mask_to_ps_inputs(outinfo, ps, ps->info.ps.input_per_primitive_mask, ps_input_cntl, &ps_offset, per_prim); if (pdev->info.gfx_level >= GFX12) { diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index 3e5668dfaaf..3f8f42c8579 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -1535,18 +1535,16 @@ radv_graphics_shaders_link_varyings_second(struct radv_shader_stage *producer_st NIR_PASS(_, consumer, nir_opt_vectorize_io, nir_var_shader_in); } - /* Recompute driver locations of PS inputs - * because the backend compiler relies on their driver locations. - */ - if (consumer->info.stage == MESA_SHADER_FRAGMENT) - nir_recompute_io_bases(consumer, nir_var_shader_in); - /* Gather shader info; at least the I/O info likely changed * and changes to only the I/O info are not reflected in nir_opt_varyings_progress. */ nir_shader_gather_info(producer, nir_shader_get_entrypoint(producer)); nir_shader_gather_info(consumer, nir_shader_get_entrypoint(consumer)); + /* Recompute intrinsic bases of PS inputs in order to remove gaps. */ + if (consumer->info.stage == MESA_SHADER_FRAGMENT) + radv_recompute_fs_input_bases(consumer); + /* Recreate XFB info from intrinsics (nir_opt_varyings may have changed it). */ if (producer->xfb_info) { nir_gather_xfb_info_from_intrinsics(producer);