ac/nir: add ac_nir_assign_fs_input_locations to set PS input locations in stone
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

No intended functional change.

This prevents possible breakage due to DCE removing input loads followed
by nir_shader_gather_info updating input masks and changing the result of
ac_nir_get_io_driver_location after PS input register contents are already
determined.

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41175>
This commit is contained in:
Marek Olšák 2026-04-24 18:58:13 -04:00 committed by Marge Bot
parent 4b0a0ed7b6
commit 0684976de8
9 changed files with 179 additions and 138 deletions

View file

@ -151,6 +151,7 @@ amd_common_files = files(
'ac_video_dec.c',
'nir/ac_nir.c',
'nir/ac_nir.h',
'nir/ac_nir_assign_io_locations.c',
'nir/ac_nir_helpers.h',
'nir/ac_nir_opt_outputs.c',
'nir/ac_nir_cull.c',

View file

@ -1082,131 +1082,3 @@ ac_nir_opt_vectorize_cb(const nir_instr *instr, const void *data)
return target_width;
}
/* The IO driver location is computed from shader_info masks using a prefix bitcount.
* Used by FS inputs, and radeonsi+LLVM also uses this for LS outputs to VGPRs and FS outputs.
*
* driver_location == nir_intrinsic_base == nir_variable::data::driver_location.
*/
unsigned
ac_nir_get_io_driver_location(const nir_shader *nir, unsigned location, bool is_input)
{
assert((nir->info.stage == MESA_SHADER_VERTEX && !is_input) ||
nir->info.stage == MESA_SHADER_FRAGMENT);
/* All "read" bits should also be set in "written" bits. */
assert(!(nir->info.outputs_read & ~nir->info.outputs_written));
assert(!(nir->info.outputs_read_16bit & nir->info.outputs_written_16bit));
/* Per-vertex masks. */
uint64_t mask = is_input ? nir->info.inputs_read : nir->info.outputs_written;
uint16_t mask16 = is_input ? nir->info.inputs_read_16bit : nir->info.outputs_written_16bit;
uint64_t back_color_mask = 0;
/* Handle FS outputs first. */
if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_input) {
assert(mask & BITFIELD64_BIT(location));
return util_bitcount64(mask & BITFIELD64_MASK(location));
}
/* Per-primitive masks. */
uint64_t mask_maybe_per_prim = 0;
uint64_t mask_per_prim = 0;
uint16_t mask16_per_prim = 0;
bool maybe_per_primitive = false;
bool per_primitive = false;
/* Fragment shader input locations must be in this order: per-vertex, maybe per-prim, per-prim. */
if (nir->info.stage == MESA_SHADER_FRAGMENT && is_input) {
if (BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_COLOR0_AMD))
mask |= VARYING_BIT_COL0;
if (BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_COLOR1_AMD))
mask |= VARYING_BIT_COL1;
/* TODO: back colors are broken with mesh shaders because they are always after per-primitive
* inputs.
*/
back_color_mask = mask & (VARYING_BIT_BFC0 | VARYING_BIT_BFC1);
assert(!(mask & VARYING_BIT_LAYER)); /* This should have been lowered. */
mask_maybe_per_prim = mask & (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT);
mask_per_prim = mask & nir->info.per_primitive_inputs &
~(VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT);
/* TODO: Add shader_info::per_primitive_inputs_16bit for separate GLES mesh shaders + mediump. */
/*mask16_per_prim = mask & nir->info.per_primitive_inputs_16bit;*/
/* Make the masks disjoint. */
mask &= ~(back_color_mask | mask_maybe_per_prim | mask_per_prim);
mask16 &= ~mask16_per_prim;
if (location == VARYING_SLOT_PRIMITIVE_ID || location == VARYING_SLOT_VIEWPORT) {
maybe_per_primitive = true;
} else if (location >= VARYING_SLOT_VAR0_16BIT) {
/* TODO: Add shader_info::per_primitive_inputs_16bit. */
/*per_primitive = nir->info.per_primitive_inputs_16bit &
BITFIELD_BIT(location - VARYING_SLOT_VAR0_16BIT);*/
} else {
assert(location <= VARYING_SLOT_VAR31);
per_primitive = nir->info.per_primitive_inputs & BITFIELD64_BIT(location);
}
}
enum {
MASK,
MASK16,
MASK_MAYBE_PRIM_PRIM, /* always after per-vertex varyings (NUM_INTERP) */
MASK_PER_PRIM,
MASK16_PER_PRIM,
BACK_COLOR_MASK, /* always after all other varyings */
PARAM_GEN_MASK, /* PARAM_GEN is loaded at location (NUM_INTERP + NUM_PRIM_INTERP). */
};
/* We'll compute a prefix bitcount from this bitset. */
const uint64_t masks[] = {
[MASK] = mask,
[MASK16] = mask16,
[MASK_MAYBE_PRIM_PRIM] = mask_maybe_per_prim,
[MASK_PER_PRIM] = mask_per_prim,
[MASK16_PER_PRIM] = mask16_per_prim,
[BACK_COLOR_MASK] = back_color_mask,
[PARAM_GEN_MASK] = 0,
};
unsigned location_mask_index;
/* Assign a mask index to the location, and make "location" relative to the beginning of its mask. */
if (nir->info.stage == MESA_SHADER_FRAGMENT && is_input && location == VARYING_SLOT_PARAM_GEN_AMD)
location_mask_index = PARAM_GEN_MASK;
else if (nir->info.stage == MESA_SHADER_FRAGMENT && is_input &&
(location == VARYING_SLOT_BFC0 || location == VARYING_SLOT_BFC1))
location_mask_index = BACK_COLOR_MASK;
else if (per_primitive)
location_mask_index = location >= VARYING_SLOT_VAR0_16BIT ? MASK16_PER_PRIM : MASK_PER_PRIM;
else if (maybe_per_primitive)
location_mask_index = MASK_MAYBE_PRIM_PRIM;
else
location_mask_index = location >= VARYING_SLOT_VAR0_16BIT ? MASK16 : MASK;
/* Make "location" relative to its mask. */
if (location >= VARYING_SLOT_VAR0_16BIT)
location -= VARYING_SLOT_VAR0_16BIT;
/* Compute the prefix bitcount. */
unsigned index = 0;
for (unsigned i = 0; i < location_mask_index; i++)
index += util_bitcount64(masks[i]);
index += util_bitcount64(masks[location_mask_index] & BITFIELD64_MASK(location));
#if 0 /* useful debug code */
printf("location_mask_index=%u\n", location_mask_index);
for (unsigned i = 0; i <= location_mask_index; i++)
printf("mask[%u] = 0x%lx\n", i, masks[i]);
printf("index=%u, location=%u, %s, num=%u\n",
index, location, is_input ? "input" : "output",
is_input ? nir->num_inputs : nir->num_outputs);
#endif
return index;
}

View file

@ -475,6 +475,9 @@ ac_nir_opt_vectorize_cb(const nir_instr *instr, const void *data);
unsigned
ac_nir_get_io_driver_location(const nir_shader *nir, unsigned location, bool is_input);
bool
ac_nir_assign_fs_input_locations(nir_shader *nir);
#ifdef __cplusplus
}
#endif

View file

@ -0,0 +1,164 @@
/* Copyright © 2026 Valve Corporation
* SPDX-License-Identifier: MIT
*/
#include "ac_nir.h"
#include "nir_builder.h"
/* The IO driver location is computed from shader_info masks using a prefix bitcount.
* Used by FS inputs, and radeonsi+LLVM also uses this for LS outputs to VGPRs and FS outputs.
*
* driver_location == nir_intrinsic_base == nir_variable::data::driver_location.
*/
unsigned
ac_nir_get_io_driver_location(const nir_shader *nir, unsigned location, bool is_input)
{
assert((nir->info.stage == MESA_SHADER_VERTEX && !is_input) ||
nir->info.stage == MESA_SHADER_FRAGMENT);
/* All "read" bits should also be set in "written" bits. */
assert(!(nir->info.outputs_read & ~nir->info.outputs_written));
assert(!(nir->info.outputs_read_16bit & nir->info.outputs_written_16bit));
/* Per-vertex masks. */
uint64_t mask = is_input ? nir->info.inputs_read : nir->info.outputs_written;
uint16_t mask16 = is_input ? nir->info.inputs_read_16bit : nir->info.outputs_written_16bit;
uint64_t back_color_mask = 0;
/* Handle FS outputs first. */
if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_input) {
assert(mask & BITFIELD64_BIT(location));
return util_bitcount64(mask & BITFIELD64_MASK(location));
}
/* Per-primitive masks. */
uint64_t mask_maybe_per_prim = 0;
uint64_t mask_per_prim = 0;
uint16_t mask16_per_prim = 0;
bool maybe_per_primitive = false;
bool per_primitive = false;
/* Fragment shader input locations must be in this order: per-vertex, maybe per-prim, per-prim. */
if (nir->info.stage == MESA_SHADER_FRAGMENT && is_input) {
if (BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_COLOR0_AMD))
mask |= VARYING_BIT_COL0;
if (BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_COLOR1_AMD))
mask |= VARYING_BIT_COL1;
/* TODO: back colors are broken with mesh shaders because they are always after per-primitive
* inputs.
*/
back_color_mask = mask & (VARYING_BIT_BFC0 | VARYING_BIT_BFC1);
assert(!(mask & VARYING_BIT_LAYER)); /* This should have been lowered. */
mask_maybe_per_prim = mask & (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT);
mask_per_prim = mask & nir->info.per_primitive_inputs &
~(VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT);
/* TODO: Add shader_info::per_primitive_inputs_16bit for separate GLES mesh shaders + mediump. */
/*mask16_per_prim = mask & nir->info.per_primitive_inputs_16bit;*/
/* Make the masks disjoint. */
mask &= ~(back_color_mask | mask_maybe_per_prim | mask_per_prim);
mask16 &= ~mask16_per_prim;
if (location == VARYING_SLOT_PRIMITIVE_ID || location == VARYING_SLOT_VIEWPORT) {
maybe_per_primitive = true;
} else if (location >= VARYING_SLOT_VAR0_16BIT) {
/* TODO: Add shader_info::per_primitive_inputs_16bit. */
/*per_primitive = nir->info.per_primitive_inputs_16bit &
BITFIELD_BIT(location - VARYING_SLOT_VAR0_16BIT);*/
} else {
assert(location <= VARYING_SLOT_VAR31);
per_primitive = nir->info.per_primitive_inputs & BITFIELD64_BIT(location);
}
}
enum {
MASK,
MASK16,
MASK_MAYBE_PRIM_PRIM, /* always after per-vertex varyings (NUM_INTERP) */
MASK_PER_PRIM,
MASK16_PER_PRIM,
BACK_COLOR_MASK, /* always after all other varyings */
PARAM_GEN_MASK, /* PARAM_GEN is loaded at location (NUM_INTERP + NUM_PRIM_INTERP). */
};
/* We'll compute a prefix bitcount from this bitset. */
const uint64_t masks[] = {
[MASK] = mask,
[MASK16] = mask16,
[MASK_MAYBE_PRIM_PRIM] = mask_maybe_per_prim,
[MASK_PER_PRIM] = mask_per_prim,
[MASK16_PER_PRIM] = mask16_per_prim,
[BACK_COLOR_MASK] = back_color_mask,
[PARAM_GEN_MASK] = 0,
};
unsigned location_mask_index;
/* Assign a mask index to the location, and make "location" relative to the beginning of its mask. */
if (nir->info.stage == MESA_SHADER_FRAGMENT && is_input && location == VARYING_SLOT_PARAM_GEN_AMD)
location_mask_index = PARAM_GEN_MASK;
else if (nir->info.stage == MESA_SHADER_FRAGMENT && is_input &&
(location == VARYING_SLOT_BFC0 || location == VARYING_SLOT_BFC1))
location_mask_index = BACK_COLOR_MASK;
else if (per_primitive)
location_mask_index = location >= VARYING_SLOT_VAR0_16BIT ? MASK16_PER_PRIM : MASK_PER_PRIM;
else if (maybe_per_primitive)
location_mask_index = MASK_MAYBE_PRIM_PRIM;
else
location_mask_index = location >= VARYING_SLOT_VAR0_16BIT ? MASK16 : MASK;
/* Make "location" relative to its mask. */
if (location >= VARYING_SLOT_VAR0_16BIT)
location -= VARYING_SLOT_VAR0_16BIT;
/* Compute the prefix bitcount. */
unsigned index = 0;
for (unsigned i = 0; i < location_mask_index; i++)
index += util_bitcount64(masks[i]);
index += util_bitcount64(masks[location_mask_index] & BITFIELD64_MASK(location));
#if 0 /* useful debug code */
printf("location_mask_index=%u\n", location_mask_index);
for (unsigned i = 0; i <= location_mask_index; i++)
printf("mask[%u] = 0x%lx\n", i, masks[i]);
printf("index=%u, location=%u, %s, num=%u\n",
index, location, is_input ? "input" : "output",
is_input ? nir->num_inputs : nir->num_outputs);
#endif
return index;
}
static bool
assign_fs_input_location(nir_builder *b, nir_intrinsic_instr *intr, void *_unused)
{
if (nir_is_input_load(intr)) {
unsigned loc =
ac_nir_get_io_driver_location(b->shader,
nir_intrinsic_io_semantics(intr).location, true);
nir_intrinsic_set_base(intr, loc);
return true;
}
return false;
}
/* Set "bases" of FS input loads to their final SPI_PS_INPUT_CNTL location.
*
* This is used by ACO and ac_nir_to_llvm to set the PS input location in ds_param_load
* and v_interp instructions, and to gather PS input info in drivers.
*
* We don't set bases in any other IO intrinsics.
*/
bool
ac_nir_assign_fs_input_locations(nir_shader *nir)
{
assert(nir->info.stage == MESA_SHADER_FRAGMENT);
return nir_shader_intrinsics_pass(nir, assign_fs_input_location,
nir_metadata_all, NULL);
}

View file

@ -1334,8 +1334,7 @@ visit_load_interpolated_input(isel_context* ctx, nir_intrinsic_instr* instr)
{
Temp dst = get_ssa_temp(ctx, &instr->def);
Temp coords = get_ssa_temp(ctx, instr->src[0].ssa);
unsigned idx =
ac_nir_get_io_driver_location(ctx->shader, nir_intrinsic_io_semantics(instr).location, true);
unsigned idx = nir_intrinsic_base(instr);
unsigned component = nir_intrinsic_component(instr);
bool high_16bits = nir_intrinsic_io_semantics(instr).high_16bits;
Temp prim_mask = get_arg(ctx, ctx->args->prim_mask);
@ -1472,7 +1471,7 @@ visit_load_fs_input(isel_context* ctx, nir_intrinsic_instr* instr)
Temp prim_mask = get_arg(ctx, ctx->args->prim_mask);
nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
unsigned idx = ac_nir_get_io_driver_location(ctx->shader, sem.location, true);
unsigned idx = nir_intrinsic_base(instr);
unsigned component = nir_intrinsic_component(instr);
bool high_16bits = sem.high_16bits;
unsigned vertex_id = 0; /* P0 */

View file

@ -2633,8 +2633,7 @@ static LLVMValueRef visit_load_input(struct ac_nir_context *ctx, nir_intrinsic_i
if (instr->intrinsic == nir_intrinsic_load_input_vertex)
vertex_id = nir_src_as_uint(instr->src[0]);
nir_shader *nir = nir_cf_node_get_function(&instr->instr.block->cf_node)->function->shader;
unsigned base = ac_nir_get_io_driver_location(nir, sem.location, true);
unsigned base = nir_intrinsic_base(instr);
LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, base, false);
LLVMTypeRef dest_type = get_def_type(ctx, &instr->def);
LLVMValueRef values[8];
@ -2823,9 +2822,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
assert(offset[0].i32 == 0);
LLVMValueRef interp_param = get_src(ctx, instr->src[0]);
nir_shader *nir = nir_cf_node_get_function(&instr->instr.block->cf_node)->function->shader;
unsigned index =
ac_nir_get_io_driver_location(nir, nir_intrinsic_io_semantics(instr).location, true);
unsigned index = nir_intrinsic_base(instr);
unsigned component = nir_intrinsic_component(instr);
result = load_interpolated_input(ctx, interp_param, index, component,
instr->def.num_components, instr->def.bit_size,

View file

@ -1982,6 +1982,9 @@ radv_fill_shader_info(const struct radv_compiler_info *compiler_info, const enum
consider_force_vrs = radv_consider_force_vrs(gfx_state, &stages[i], &stages[MESA_SHADER_FRAGMENT]);
}
if (i == MESA_SHADER_FRAGMENT)
NIR_PASS(_, stages[i].nir, ac_nir_assign_fs_input_locations);
radv_nir_shader_info_pass(compiler_info, stages[i].nir, &stages[i].layout, &stages[i].key, gfx_state,
pipeline_type, consider_force_vrs, &stages[i].info);
}

View file

@ -69,7 +69,7 @@ gather_load_fs_input_info(const nir_shader *nir, const nir_intrinsic_instr *intr
{
const nir_io_semantics io_sem = nir_intrinsic_io_semantics(intrin);
const unsigned location = io_sem.location;
const unsigned mapped_location = ac_nir_get_io_driver_location(nir, io_sem.location, true);
const unsigned mapped_location = nir_intrinsic_base(intrin);
const unsigned attrib_count = io_sem.num_slots;
const unsigned component = nir_intrinsic_component(intrin);

View file

@ -90,6 +90,8 @@ void si_get_shader_variant_info(struct si_shader *shader,
nir_divergence_analysis(nir);
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
NIR_PASS(_, nir, ac_nir_assign_fs_input_locations);
/* Since flat+convergent and non-flat components can occur in the same vec4, start with
* all PS inputs as flat and change them to smooth when we find a component that's
* interpolated.
@ -183,7 +185,7 @@ void si_get_shader_variant_info(struct si_shader *shader,
shader->info.uses_vmem_load_other = true;
} else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
unsigned index = ac_nir_get_io_driver_location(nir, sem.location, true);
unsigned index = nir_intrinsic_base(intr);
assert(sem.num_slots == 1);
shader->info.num_ps_inputs = MAX2(shader->info.num_ps_inputs, index + 1);