diff --git a/src/amd/common/meson.build b/src/amd/common/meson.build index b07322d64cf..1579abd8d61 100644 --- a/src/amd/common/meson.build +++ b/src/amd/common/meson.build @@ -151,6 +151,7 @@ amd_common_files = files( 'ac_video_dec.c', 'nir/ac_nir.c', 'nir/ac_nir.h', + 'nir/ac_nir_assign_io_locations.c', 'nir/ac_nir_helpers.h', 'nir/ac_nir_opt_outputs.c', 'nir/ac_nir_cull.c', diff --git a/src/amd/common/nir/ac_nir.c b/src/amd/common/nir/ac_nir.c index 581c97a4041..f66984b9dcc 100644 --- a/src/amd/common/nir/ac_nir.c +++ b/src/amd/common/nir/ac_nir.c @@ -1082,131 +1082,3 @@ ac_nir_opt_vectorize_cb(const nir_instr *instr, const void *data) return target_width; } - -/* The IO driver location is computed from shader_info masks using a prefix bitcount. - * Used by FS inputs, and radeonsi+LLVM also uses this for LS outputs to VGPRs and FS outputs. - * - * driver_location == nir_intrinsic_base == nir_variable::data::driver_location. - */ -unsigned -ac_nir_get_io_driver_location(const nir_shader *nir, unsigned location, bool is_input) -{ - assert((nir->info.stage == MESA_SHADER_VERTEX && !is_input) || - nir->info.stage == MESA_SHADER_FRAGMENT); - /* All "read" bits should also be set in "written" bits. */ - assert(!(nir->info.outputs_read & ~nir->info.outputs_written)); - assert(!(nir->info.outputs_read_16bit & nir->info.outputs_written_16bit)); - - /* Per-vertex masks. */ - uint64_t mask = is_input ? nir->info.inputs_read : nir->info.outputs_written; - uint16_t mask16 = is_input ? nir->info.inputs_read_16bit : nir->info.outputs_written_16bit; - uint64_t back_color_mask = 0; - - /* Handle FS outputs first. */ - if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_input) { - assert(mask & BITFIELD64_BIT(location)); - return util_bitcount64(mask & BITFIELD64_MASK(location)); - } - - /* Per-primitive masks. */ - uint64_t mask_maybe_per_prim = 0; - uint64_t mask_per_prim = 0; - uint16_t mask16_per_prim = 0; - bool maybe_per_primitive = false; - bool per_primitive = false; - - /* Fragment shader input locations must be in this order: per-vertex, maybe per-prim, per-prim. */ - if (nir->info.stage == MESA_SHADER_FRAGMENT && is_input) { - if (BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_COLOR0_AMD)) - mask |= VARYING_BIT_COL0; - if (BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_COLOR1_AMD)) - mask |= VARYING_BIT_COL1; - - /* TODO: back colors are broken with mesh shaders because they are always after per-primitive - * inputs. - */ - back_color_mask = mask & (VARYING_BIT_BFC0 | VARYING_BIT_BFC1); - - assert(!(mask & VARYING_BIT_LAYER)); /* This should have been lowered. */ - mask_maybe_per_prim = mask & (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT); - mask_per_prim = mask & nir->info.per_primitive_inputs & - ~(VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT); - - /* TODO: Add shader_info::per_primitive_inputs_16bit for separate GLES mesh shaders + mediump. */ - /*mask16_per_prim = mask & nir->info.per_primitive_inputs_16bit;*/ - - /* Make the masks disjoint. */ - mask &= ~(back_color_mask | mask_maybe_per_prim | mask_per_prim); - mask16 &= ~mask16_per_prim; - - if (location == VARYING_SLOT_PRIMITIVE_ID || location == VARYING_SLOT_VIEWPORT) { - maybe_per_primitive = true; - } else if (location >= VARYING_SLOT_VAR0_16BIT) { - /* TODO: Add shader_info::per_primitive_inputs_16bit. */ - /*per_primitive = nir->info.per_primitive_inputs_16bit & - BITFIELD_BIT(location - VARYING_SLOT_VAR0_16BIT);*/ - } else { - assert(location <= VARYING_SLOT_VAR31); - per_primitive = nir->info.per_primitive_inputs & BITFIELD64_BIT(location); - } - } - - enum { - MASK, - MASK16, - MASK_MAYBE_PRIM_PRIM, /* always after per-vertex varyings (NUM_INTERP) */ - MASK_PER_PRIM, - MASK16_PER_PRIM, - BACK_COLOR_MASK, /* always after all other varyings */ - PARAM_GEN_MASK, /* PARAM_GEN is loaded at location (NUM_INTERP + NUM_PRIM_INTERP). */ - }; - - /* We'll compute a prefix bitcount from this bitset. */ - const uint64_t masks[] = { - [MASK] = mask, - [MASK16] = mask16, - [MASK_MAYBE_PRIM_PRIM] = mask_maybe_per_prim, - [MASK_PER_PRIM] = mask_per_prim, - [MASK16_PER_PRIM] = mask16_per_prim, - [BACK_COLOR_MASK] = back_color_mask, - [PARAM_GEN_MASK] = 0, - }; - unsigned location_mask_index; - - /* Assign a mask index to the location, and make "location" relative to the beginning of its mask. */ - if (nir->info.stage == MESA_SHADER_FRAGMENT && is_input && location == VARYING_SLOT_PARAM_GEN_AMD) - location_mask_index = PARAM_GEN_MASK; - else if (nir->info.stage == MESA_SHADER_FRAGMENT && is_input && - (location == VARYING_SLOT_BFC0 || location == VARYING_SLOT_BFC1)) - location_mask_index = BACK_COLOR_MASK; - else if (per_primitive) - location_mask_index = location >= VARYING_SLOT_VAR0_16BIT ? MASK16_PER_PRIM : MASK_PER_PRIM; - else if (maybe_per_primitive) - location_mask_index = MASK_MAYBE_PRIM_PRIM; - else - location_mask_index = location >= VARYING_SLOT_VAR0_16BIT ? MASK16 : MASK; - - /* Make "location" relative to its mask. */ - if (location >= VARYING_SLOT_VAR0_16BIT) - location -= VARYING_SLOT_VAR0_16BIT; - - /* Compute the prefix bitcount. */ - unsigned index = 0; - - for (unsigned i = 0; i < location_mask_index; i++) - index += util_bitcount64(masks[i]); - - index += util_bitcount64(masks[location_mask_index] & BITFIELD64_MASK(location)); - -#if 0 /* useful debug code */ - printf("location_mask_index=%u\n", location_mask_index); - for (unsigned i = 0; i <= location_mask_index; i++) - printf("mask[%u] = 0x%lx\n", i, masks[i]); - - printf("index=%u, location=%u, %s, num=%u\n", - index, location, is_input ? "input" : "output", - is_input ? nir->num_inputs : nir->num_outputs); -#endif - - return index; -} diff --git a/src/amd/common/nir/ac_nir.h b/src/amd/common/nir/ac_nir.h index b7e6c313e2a..2ffe9e83838 100644 --- a/src/amd/common/nir/ac_nir.h +++ b/src/amd/common/nir/ac_nir.h @@ -475,6 +475,9 @@ ac_nir_opt_vectorize_cb(const nir_instr *instr, const void *data); unsigned ac_nir_get_io_driver_location(const nir_shader *nir, unsigned location, bool is_input); +bool +ac_nir_assign_fs_input_locations(nir_shader *nir); + #ifdef __cplusplus } #endif diff --git a/src/amd/common/nir/ac_nir_assign_io_locations.c b/src/amd/common/nir/ac_nir_assign_io_locations.c new file mode 100644 index 00000000000..514d3354197 --- /dev/null +++ b/src/amd/common/nir/ac_nir_assign_io_locations.c @@ -0,0 +1,164 @@ +/* Copyright © 2026 Valve Corporation + * SPDX-License-Identifier: MIT + */ + +#include "ac_nir.h" +#include "nir_builder.h" + +/* The IO driver location is computed from shader_info masks using a prefix bitcount. + * Used by FS inputs, and radeonsi+LLVM also uses this for LS outputs to VGPRs and FS outputs. + * + * driver_location == nir_intrinsic_base == nir_variable::data::driver_location. + */ +unsigned +ac_nir_get_io_driver_location(const nir_shader *nir, unsigned location, bool is_input) +{ + assert((nir->info.stage == MESA_SHADER_VERTEX && !is_input) || + nir->info.stage == MESA_SHADER_FRAGMENT); + /* All "read" bits should also be set in "written" bits. */ + assert(!(nir->info.outputs_read & ~nir->info.outputs_written)); + assert(!(nir->info.outputs_read_16bit & nir->info.outputs_written_16bit)); + + /* Per-vertex masks. */ + uint64_t mask = is_input ? nir->info.inputs_read : nir->info.outputs_written; + uint16_t mask16 = is_input ? nir->info.inputs_read_16bit : nir->info.outputs_written_16bit; + uint64_t back_color_mask = 0; + + /* Handle FS outputs first. */ + if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_input) { + assert(mask & BITFIELD64_BIT(location)); + return util_bitcount64(mask & BITFIELD64_MASK(location)); + } + + /* Per-primitive masks. */ + uint64_t mask_maybe_per_prim = 0; + uint64_t mask_per_prim = 0; + uint16_t mask16_per_prim = 0; + bool maybe_per_primitive = false; + bool per_primitive = false; + + /* Fragment shader input locations must be in this order: per-vertex, maybe per-prim, per-prim. */ + if (nir->info.stage == MESA_SHADER_FRAGMENT && is_input) { + if (BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_COLOR0_AMD)) + mask |= VARYING_BIT_COL0; + if (BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_COLOR1_AMD)) + mask |= VARYING_BIT_COL1; + + /* TODO: back colors are broken with mesh shaders because they are always after per-primitive + * inputs. + */ + back_color_mask = mask & (VARYING_BIT_BFC0 | VARYING_BIT_BFC1); + + assert(!(mask & VARYING_BIT_LAYER)); /* This should have been lowered. */ + mask_maybe_per_prim = mask & (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT); + mask_per_prim = mask & nir->info.per_primitive_inputs & + ~(VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT); + + /* TODO: Add shader_info::per_primitive_inputs_16bit for separate GLES mesh shaders + mediump. */ + /*mask16_per_prim = mask & nir->info.per_primitive_inputs_16bit;*/ + + /* Make the masks disjoint. */ + mask &= ~(back_color_mask | mask_maybe_per_prim | mask_per_prim); + mask16 &= ~mask16_per_prim; + + if (location == VARYING_SLOT_PRIMITIVE_ID || location == VARYING_SLOT_VIEWPORT) { + maybe_per_primitive = true; + } else if (location >= VARYING_SLOT_VAR0_16BIT) { + /* TODO: Add shader_info::per_primitive_inputs_16bit. */ + /*per_primitive = nir->info.per_primitive_inputs_16bit & + BITFIELD_BIT(location - VARYING_SLOT_VAR0_16BIT);*/ + } else { + assert(location <= VARYING_SLOT_VAR31); + per_primitive = nir->info.per_primitive_inputs & BITFIELD64_BIT(location); + } + } + + enum { + MASK, + MASK16, + MASK_MAYBE_PRIM_PRIM, /* always after per-vertex varyings (NUM_INTERP) */ + MASK_PER_PRIM, + MASK16_PER_PRIM, + BACK_COLOR_MASK, /* always after all other varyings */ + PARAM_GEN_MASK, /* PARAM_GEN is loaded at location (NUM_INTERP + NUM_PRIM_INTERP). */ + }; + + /* We'll compute a prefix bitcount from this bitset. */ + const uint64_t masks[] = { + [MASK] = mask, + [MASK16] = mask16, + [MASK_MAYBE_PRIM_PRIM] = mask_maybe_per_prim, + [MASK_PER_PRIM] = mask_per_prim, + [MASK16_PER_PRIM] = mask16_per_prim, + [BACK_COLOR_MASK] = back_color_mask, + [PARAM_GEN_MASK] = 0, + }; + unsigned location_mask_index; + + /* Assign a mask index to the location, and make "location" relative to the beginning of its mask. */ + if (nir->info.stage == MESA_SHADER_FRAGMENT && is_input && location == VARYING_SLOT_PARAM_GEN_AMD) + location_mask_index = PARAM_GEN_MASK; + else if (nir->info.stage == MESA_SHADER_FRAGMENT && is_input && + (location == VARYING_SLOT_BFC0 || location == VARYING_SLOT_BFC1)) + location_mask_index = BACK_COLOR_MASK; + else if (per_primitive) + location_mask_index = location >= VARYING_SLOT_VAR0_16BIT ? MASK16_PER_PRIM : MASK_PER_PRIM; + else if (maybe_per_primitive) + location_mask_index = MASK_MAYBE_PRIM_PRIM; + else + location_mask_index = location >= VARYING_SLOT_VAR0_16BIT ? MASK16 : MASK; + + /* Make "location" relative to its mask. */ + if (location >= VARYING_SLOT_VAR0_16BIT) + location -= VARYING_SLOT_VAR0_16BIT; + + /* Compute the prefix bitcount. */ + unsigned index = 0; + + for (unsigned i = 0; i < location_mask_index; i++) + index += util_bitcount64(masks[i]); + + index += util_bitcount64(masks[location_mask_index] & BITFIELD64_MASK(location)); + +#if 0 /* useful debug code */ + printf("location_mask_index=%u\n", location_mask_index); + for (unsigned i = 0; i <= location_mask_index; i++) + printf("mask[%u] = 0x%lx\n", i, masks[i]); + + printf("index=%u, location=%u, %s, num=%u\n", + index, location, is_input ? "input" : "output", + is_input ? nir->num_inputs : nir->num_outputs); +#endif + + return index; +} + +static bool +assign_fs_input_location(nir_builder *b, nir_intrinsic_instr *intr, void *_unused) +{ + if (nir_is_input_load(intr)) { + unsigned loc = + ac_nir_get_io_driver_location(b->shader, + nir_intrinsic_io_semantics(intr).location, true); + nir_intrinsic_set_base(intr, loc); + return true; + } + + return false; +} + +/* Set "bases" of FS input loads to their final SPI_PS_INPUT_CNTL location. + * + * This is used by ACO and ac_nir_to_llvm to set the PS input location in ds_param_load + * and v_interp instructions, and to gather PS input info in drivers. + * + * We don't set bases in any other IO intrinsics. + */ +bool +ac_nir_assign_fs_input_locations(nir_shader *nir) +{ + assert(nir->info.stage == MESA_SHADER_FRAGMENT); + + return nir_shader_intrinsics_pass(nir, assign_fs_input_location, + nir_metadata_all, NULL); +} diff --git a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp index d4773d0a6d6..d52b23a290f 100644 --- a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp @@ -1334,8 +1334,7 @@ visit_load_interpolated_input(isel_context* ctx, nir_intrinsic_instr* instr) { Temp dst = get_ssa_temp(ctx, &instr->def); Temp coords = get_ssa_temp(ctx, instr->src[0].ssa); - unsigned idx = - ac_nir_get_io_driver_location(ctx->shader, nir_intrinsic_io_semantics(instr).location, true); + unsigned idx = nir_intrinsic_base(instr); unsigned component = nir_intrinsic_component(instr); bool high_16bits = nir_intrinsic_io_semantics(instr).high_16bits; Temp prim_mask = get_arg(ctx, ctx->args->prim_mask); @@ -1472,7 +1471,7 @@ visit_load_fs_input(isel_context* ctx, nir_intrinsic_instr* instr) Temp prim_mask = get_arg(ctx, ctx->args->prim_mask); nir_io_semantics sem = nir_intrinsic_io_semantics(instr); - unsigned idx = ac_nir_get_io_driver_location(ctx->shader, sem.location, true); + unsigned idx = nir_intrinsic_base(instr); unsigned component = nir_intrinsic_component(instr); bool high_16bits = sem.high_16bits; unsigned vertex_id = 0; /* P0 */ diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 842152ea72a..a10d898ea69 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -2633,8 +2633,7 @@ static LLVMValueRef visit_load_input(struct ac_nir_context *ctx, nir_intrinsic_i if (instr->intrinsic == nir_intrinsic_load_input_vertex) vertex_id = nir_src_as_uint(instr->src[0]); - nir_shader *nir = nir_cf_node_get_function(&instr->instr.block->cf_node)->function->shader; - unsigned base = ac_nir_get_io_driver_location(nir, sem.location, true); + unsigned base = nir_intrinsic_base(instr); LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, base, false); LLVMTypeRef dest_type = get_def_type(ctx, &instr->def); LLVMValueRef values[8]; @@ -2823,9 +2822,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins assert(offset[0].i32 == 0); LLVMValueRef interp_param = get_src(ctx, instr->src[0]); - nir_shader *nir = nir_cf_node_get_function(&instr->instr.block->cf_node)->function->shader; - unsigned index = - ac_nir_get_io_driver_location(nir, nir_intrinsic_io_semantics(instr).location, true); + unsigned index = nir_intrinsic_base(instr); unsigned component = nir_intrinsic_component(instr); result = load_interpolated_input(ctx, interp_param, index, component, instr->def.num_components, instr->def.bit_size, diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index 060e221e516..73b29439551 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -1982,6 +1982,9 @@ radv_fill_shader_info(const struct radv_compiler_info *compiler_info, const enum consider_force_vrs = radv_consider_force_vrs(gfx_state, &stages[i], &stages[MESA_SHADER_FRAGMENT]); } + if (i == MESA_SHADER_FRAGMENT) + NIR_PASS(_, stages[i].nir, ac_nir_assign_fs_input_locations); + radv_nir_shader_info_pass(compiler_info, stages[i].nir, &stages[i].layout, &stages[i].key, gfx_state, pipeline_type, consider_force_vrs, &stages[i].info); } diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index 1df39bfb780..6bcfcd3ccda 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -69,7 +69,7 @@ gather_load_fs_input_info(const nir_shader *nir, const nir_intrinsic_instr *intr { const nir_io_semantics io_sem = nir_intrinsic_io_semantics(intrin); const unsigned location = io_sem.location; - const unsigned mapped_location = ac_nir_get_io_driver_location(nir, io_sem.location, true); + const unsigned mapped_location = nir_intrinsic_base(intrin); const unsigned attrib_count = io_sem.num_slots; const unsigned component = nir_intrinsic_component(intrin); diff --git a/src/gallium/drivers/radeonsi/si_shader_variant_info.c b/src/gallium/drivers/radeonsi/si_shader_variant_info.c index ffb3396d103..984bcdf46cf 100644 --- a/src/gallium/drivers/radeonsi/si_shader_variant_info.c +++ b/src/gallium/drivers/radeonsi/si_shader_variant_info.c @@ -90,6 +90,8 @@ void si_get_shader_variant_info(struct si_shader *shader, nir_divergence_analysis(nir); if (nir->info.stage == MESA_SHADER_FRAGMENT) { + NIR_PASS(_, nir, ac_nir_assign_fs_input_locations); + /* Since flat+convergent and non-flat components can occur in the same vec4, start with * all PS inputs as flat and change them to smooth when we find a component that's * interpolated. @@ -183,7 +185,7 @@ void si_get_shader_variant_info(struct si_shader *shader, shader->info.uses_vmem_load_other = true; } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { nir_io_semantics sem = nir_intrinsic_io_semantics(intr); - unsigned index = ac_nir_get_io_driver_location(nir, sem.location, true); + unsigned index = nir_intrinsic_base(intr); assert(sem.num_slots == 1); shader->info.num_ps_inputs = MAX2(shader->info.num_ps_inputs, index + 1);