mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 22:28:04 +02:00
ac/nir: add ac_nir_assign_fs_input_locations to set PS input locations in stone
No intended functional change. This prevents possible breakage due to DCE removing input loads followed by nir_shader_gather_info updating input masks and changing the result of ac_nir_get_io_driver_location after PS input register contents are already determined. Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41175>
This commit is contained in:
parent
4b0a0ed7b6
commit
0684976de8
9 changed files with 179 additions and 138 deletions
|
|
@ -151,6 +151,7 @@ amd_common_files = files(
|
|||
'ac_video_dec.c',
|
||||
'nir/ac_nir.c',
|
||||
'nir/ac_nir.h',
|
||||
'nir/ac_nir_assign_io_locations.c',
|
||||
'nir/ac_nir_helpers.h',
|
||||
'nir/ac_nir_opt_outputs.c',
|
||||
'nir/ac_nir_cull.c',
|
||||
|
|
|
|||
|
|
@ -1082,131 +1082,3 @@ ac_nir_opt_vectorize_cb(const nir_instr *instr, const void *data)
|
|||
|
||||
return target_width;
|
||||
}
|
||||
|
||||
/* The IO driver location is computed from shader_info masks using a prefix bitcount.
|
||||
* Used by FS inputs, and radeonsi+LLVM also uses this for LS outputs to VGPRs and FS outputs.
|
||||
*
|
||||
* driver_location == nir_intrinsic_base == nir_variable::data::driver_location.
|
||||
*/
|
||||
unsigned
|
||||
ac_nir_get_io_driver_location(const nir_shader *nir, unsigned location, bool is_input)
|
||||
{
|
||||
assert((nir->info.stage == MESA_SHADER_VERTEX && !is_input) ||
|
||||
nir->info.stage == MESA_SHADER_FRAGMENT);
|
||||
/* All "read" bits should also be set in "written" bits. */
|
||||
assert(!(nir->info.outputs_read & ~nir->info.outputs_written));
|
||||
assert(!(nir->info.outputs_read_16bit & nir->info.outputs_written_16bit));
|
||||
|
||||
/* Per-vertex masks. */
|
||||
uint64_t mask = is_input ? nir->info.inputs_read : nir->info.outputs_written;
|
||||
uint16_t mask16 = is_input ? nir->info.inputs_read_16bit : nir->info.outputs_written_16bit;
|
||||
uint64_t back_color_mask = 0;
|
||||
|
||||
/* Handle FS outputs first. */
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_input) {
|
||||
assert(mask & BITFIELD64_BIT(location));
|
||||
return util_bitcount64(mask & BITFIELD64_MASK(location));
|
||||
}
|
||||
|
||||
/* Per-primitive masks. */
|
||||
uint64_t mask_maybe_per_prim = 0;
|
||||
uint64_t mask_per_prim = 0;
|
||||
uint16_t mask16_per_prim = 0;
|
||||
bool maybe_per_primitive = false;
|
||||
bool per_primitive = false;
|
||||
|
||||
/* Fragment shader input locations must be in this order: per-vertex, maybe per-prim, per-prim. */
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT && is_input) {
|
||||
if (BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_COLOR0_AMD))
|
||||
mask |= VARYING_BIT_COL0;
|
||||
if (BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_COLOR1_AMD))
|
||||
mask |= VARYING_BIT_COL1;
|
||||
|
||||
/* TODO: back colors are broken with mesh shaders because they are always after per-primitive
|
||||
* inputs.
|
||||
*/
|
||||
back_color_mask = mask & (VARYING_BIT_BFC0 | VARYING_BIT_BFC1);
|
||||
|
||||
assert(!(mask & VARYING_BIT_LAYER)); /* This should have been lowered. */
|
||||
mask_maybe_per_prim = mask & (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT);
|
||||
mask_per_prim = mask & nir->info.per_primitive_inputs &
|
||||
~(VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT);
|
||||
|
||||
/* TODO: Add shader_info::per_primitive_inputs_16bit for separate GLES mesh shaders + mediump. */
|
||||
/*mask16_per_prim = mask & nir->info.per_primitive_inputs_16bit;*/
|
||||
|
||||
/* Make the masks disjoint. */
|
||||
mask &= ~(back_color_mask | mask_maybe_per_prim | mask_per_prim);
|
||||
mask16 &= ~mask16_per_prim;
|
||||
|
||||
if (location == VARYING_SLOT_PRIMITIVE_ID || location == VARYING_SLOT_VIEWPORT) {
|
||||
maybe_per_primitive = true;
|
||||
} else if (location >= VARYING_SLOT_VAR0_16BIT) {
|
||||
/* TODO: Add shader_info::per_primitive_inputs_16bit. */
|
||||
/*per_primitive = nir->info.per_primitive_inputs_16bit &
|
||||
BITFIELD_BIT(location - VARYING_SLOT_VAR0_16BIT);*/
|
||||
} else {
|
||||
assert(location <= VARYING_SLOT_VAR31);
|
||||
per_primitive = nir->info.per_primitive_inputs & BITFIELD64_BIT(location);
|
||||
}
|
||||
}
|
||||
|
||||
enum {
|
||||
MASK,
|
||||
MASK16,
|
||||
MASK_MAYBE_PRIM_PRIM, /* always after per-vertex varyings (NUM_INTERP) */
|
||||
MASK_PER_PRIM,
|
||||
MASK16_PER_PRIM,
|
||||
BACK_COLOR_MASK, /* always after all other varyings */
|
||||
PARAM_GEN_MASK, /* PARAM_GEN is loaded at location (NUM_INTERP + NUM_PRIM_INTERP). */
|
||||
};
|
||||
|
||||
/* We'll compute a prefix bitcount from this bitset. */
|
||||
const uint64_t masks[] = {
|
||||
[MASK] = mask,
|
||||
[MASK16] = mask16,
|
||||
[MASK_MAYBE_PRIM_PRIM] = mask_maybe_per_prim,
|
||||
[MASK_PER_PRIM] = mask_per_prim,
|
||||
[MASK16_PER_PRIM] = mask16_per_prim,
|
||||
[BACK_COLOR_MASK] = back_color_mask,
|
||||
[PARAM_GEN_MASK] = 0,
|
||||
};
|
||||
unsigned location_mask_index;
|
||||
|
||||
/* Assign a mask index to the location, and make "location" relative to the beginning of its mask. */
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT && is_input && location == VARYING_SLOT_PARAM_GEN_AMD)
|
||||
location_mask_index = PARAM_GEN_MASK;
|
||||
else if (nir->info.stage == MESA_SHADER_FRAGMENT && is_input &&
|
||||
(location == VARYING_SLOT_BFC0 || location == VARYING_SLOT_BFC1))
|
||||
location_mask_index = BACK_COLOR_MASK;
|
||||
else if (per_primitive)
|
||||
location_mask_index = location >= VARYING_SLOT_VAR0_16BIT ? MASK16_PER_PRIM : MASK_PER_PRIM;
|
||||
else if (maybe_per_primitive)
|
||||
location_mask_index = MASK_MAYBE_PRIM_PRIM;
|
||||
else
|
||||
location_mask_index = location >= VARYING_SLOT_VAR0_16BIT ? MASK16 : MASK;
|
||||
|
||||
/* Make "location" relative to its mask. */
|
||||
if (location >= VARYING_SLOT_VAR0_16BIT)
|
||||
location -= VARYING_SLOT_VAR0_16BIT;
|
||||
|
||||
/* Compute the prefix bitcount. */
|
||||
unsigned index = 0;
|
||||
|
||||
for (unsigned i = 0; i < location_mask_index; i++)
|
||||
index += util_bitcount64(masks[i]);
|
||||
|
||||
index += util_bitcount64(masks[location_mask_index] & BITFIELD64_MASK(location));
|
||||
|
||||
#if 0 /* useful debug code */
|
||||
printf("location_mask_index=%u\n", location_mask_index);
|
||||
for (unsigned i = 0; i <= location_mask_index; i++)
|
||||
printf("mask[%u] = 0x%lx\n", i, masks[i]);
|
||||
|
||||
printf("index=%u, location=%u, %s, num=%u\n",
|
||||
index, location, is_input ? "input" : "output",
|
||||
is_input ? nir->num_inputs : nir->num_outputs);
|
||||
#endif
|
||||
|
||||
return index;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -475,6 +475,9 @@ ac_nir_opt_vectorize_cb(const nir_instr *instr, const void *data);
|
|||
unsigned
|
||||
ac_nir_get_io_driver_location(const nir_shader *nir, unsigned location, bool is_input);
|
||||
|
||||
bool
|
||||
ac_nir_assign_fs_input_locations(nir_shader *nir);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
164
src/amd/common/nir/ac_nir_assign_io_locations.c
Normal file
164
src/amd/common/nir/ac_nir_assign_io_locations.c
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
/* Copyright © 2026 Valve Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "ac_nir.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
/* The IO driver location is computed from shader_info masks using a prefix bitcount.
|
||||
* Used by FS inputs, and radeonsi+LLVM also uses this for LS outputs to VGPRs and FS outputs.
|
||||
*
|
||||
* driver_location == nir_intrinsic_base == nir_variable::data::driver_location.
|
||||
*/
|
||||
unsigned
|
||||
ac_nir_get_io_driver_location(const nir_shader *nir, unsigned location, bool is_input)
|
||||
{
|
||||
assert((nir->info.stage == MESA_SHADER_VERTEX && !is_input) ||
|
||||
nir->info.stage == MESA_SHADER_FRAGMENT);
|
||||
/* All "read" bits should also be set in "written" bits. */
|
||||
assert(!(nir->info.outputs_read & ~nir->info.outputs_written));
|
||||
assert(!(nir->info.outputs_read_16bit & nir->info.outputs_written_16bit));
|
||||
|
||||
/* Per-vertex masks. */
|
||||
uint64_t mask = is_input ? nir->info.inputs_read : nir->info.outputs_written;
|
||||
uint16_t mask16 = is_input ? nir->info.inputs_read_16bit : nir->info.outputs_written_16bit;
|
||||
uint64_t back_color_mask = 0;
|
||||
|
||||
/* Handle FS outputs first. */
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_input) {
|
||||
assert(mask & BITFIELD64_BIT(location));
|
||||
return util_bitcount64(mask & BITFIELD64_MASK(location));
|
||||
}
|
||||
|
||||
/* Per-primitive masks. */
|
||||
uint64_t mask_maybe_per_prim = 0;
|
||||
uint64_t mask_per_prim = 0;
|
||||
uint16_t mask16_per_prim = 0;
|
||||
bool maybe_per_primitive = false;
|
||||
bool per_primitive = false;
|
||||
|
||||
/* Fragment shader input locations must be in this order: per-vertex, maybe per-prim, per-prim. */
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT && is_input) {
|
||||
if (BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_COLOR0_AMD))
|
||||
mask |= VARYING_BIT_COL0;
|
||||
if (BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_COLOR1_AMD))
|
||||
mask |= VARYING_BIT_COL1;
|
||||
|
||||
/* TODO: back colors are broken with mesh shaders because they are always after per-primitive
|
||||
* inputs.
|
||||
*/
|
||||
back_color_mask = mask & (VARYING_BIT_BFC0 | VARYING_BIT_BFC1);
|
||||
|
||||
assert(!(mask & VARYING_BIT_LAYER)); /* This should have been lowered. */
|
||||
mask_maybe_per_prim = mask & (VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT);
|
||||
mask_per_prim = mask & nir->info.per_primitive_inputs &
|
||||
~(VARYING_BIT_PRIMITIVE_ID | VARYING_BIT_VIEWPORT);
|
||||
|
||||
/* TODO: Add shader_info::per_primitive_inputs_16bit for separate GLES mesh shaders + mediump. */
|
||||
/*mask16_per_prim = mask & nir->info.per_primitive_inputs_16bit;*/
|
||||
|
||||
/* Make the masks disjoint. */
|
||||
mask &= ~(back_color_mask | mask_maybe_per_prim | mask_per_prim);
|
||||
mask16 &= ~mask16_per_prim;
|
||||
|
||||
if (location == VARYING_SLOT_PRIMITIVE_ID || location == VARYING_SLOT_VIEWPORT) {
|
||||
maybe_per_primitive = true;
|
||||
} else if (location >= VARYING_SLOT_VAR0_16BIT) {
|
||||
/* TODO: Add shader_info::per_primitive_inputs_16bit. */
|
||||
/*per_primitive = nir->info.per_primitive_inputs_16bit &
|
||||
BITFIELD_BIT(location - VARYING_SLOT_VAR0_16BIT);*/
|
||||
} else {
|
||||
assert(location <= VARYING_SLOT_VAR31);
|
||||
per_primitive = nir->info.per_primitive_inputs & BITFIELD64_BIT(location);
|
||||
}
|
||||
}
|
||||
|
||||
enum {
|
||||
MASK,
|
||||
MASK16,
|
||||
MASK_MAYBE_PRIM_PRIM, /* always after per-vertex varyings (NUM_INTERP) */
|
||||
MASK_PER_PRIM,
|
||||
MASK16_PER_PRIM,
|
||||
BACK_COLOR_MASK, /* always after all other varyings */
|
||||
PARAM_GEN_MASK, /* PARAM_GEN is loaded at location (NUM_INTERP + NUM_PRIM_INTERP). */
|
||||
};
|
||||
|
||||
/* We'll compute a prefix bitcount from this bitset. */
|
||||
const uint64_t masks[] = {
|
||||
[MASK] = mask,
|
||||
[MASK16] = mask16,
|
||||
[MASK_MAYBE_PRIM_PRIM] = mask_maybe_per_prim,
|
||||
[MASK_PER_PRIM] = mask_per_prim,
|
||||
[MASK16_PER_PRIM] = mask16_per_prim,
|
||||
[BACK_COLOR_MASK] = back_color_mask,
|
||||
[PARAM_GEN_MASK] = 0,
|
||||
};
|
||||
unsigned location_mask_index;
|
||||
|
||||
/* Assign a mask index to the location, and make "location" relative to the beginning of its mask. */
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT && is_input && location == VARYING_SLOT_PARAM_GEN_AMD)
|
||||
location_mask_index = PARAM_GEN_MASK;
|
||||
else if (nir->info.stage == MESA_SHADER_FRAGMENT && is_input &&
|
||||
(location == VARYING_SLOT_BFC0 || location == VARYING_SLOT_BFC1))
|
||||
location_mask_index = BACK_COLOR_MASK;
|
||||
else if (per_primitive)
|
||||
location_mask_index = location >= VARYING_SLOT_VAR0_16BIT ? MASK16_PER_PRIM : MASK_PER_PRIM;
|
||||
else if (maybe_per_primitive)
|
||||
location_mask_index = MASK_MAYBE_PRIM_PRIM;
|
||||
else
|
||||
location_mask_index = location >= VARYING_SLOT_VAR0_16BIT ? MASK16 : MASK;
|
||||
|
||||
/* Make "location" relative to its mask. */
|
||||
if (location >= VARYING_SLOT_VAR0_16BIT)
|
||||
location -= VARYING_SLOT_VAR0_16BIT;
|
||||
|
||||
/* Compute the prefix bitcount. */
|
||||
unsigned index = 0;
|
||||
|
||||
for (unsigned i = 0; i < location_mask_index; i++)
|
||||
index += util_bitcount64(masks[i]);
|
||||
|
||||
index += util_bitcount64(masks[location_mask_index] & BITFIELD64_MASK(location));
|
||||
|
||||
#if 0 /* useful debug code */
|
||||
printf("location_mask_index=%u\n", location_mask_index);
|
||||
for (unsigned i = 0; i <= location_mask_index; i++)
|
||||
printf("mask[%u] = 0x%lx\n", i, masks[i]);
|
||||
|
||||
printf("index=%u, location=%u, %s, num=%u\n",
|
||||
index, location, is_input ? "input" : "output",
|
||||
is_input ? nir->num_inputs : nir->num_outputs);
|
||||
#endif
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
static bool
|
||||
assign_fs_input_location(nir_builder *b, nir_intrinsic_instr *intr, void *_unused)
|
||||
{
|
||||
if (nir_is_input_load(intr)) {
|
||||
unsigned loc =
|
||||
ac_nir_get_io_driver_location(b->shader,
|
||||
nir_intrinsic_io_semantics(intr).location, true);
|
||||
nir_intrinsic_set_base(intr, loc);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Set "bases" of FS input loads to their final SPI_PS_INPUT_CNTL location.
|
||||
*
|
||||
* This is used by ACO and ac_nir_to_llvm to set the PS input location in ds_param_load
|
||||
* and v_interp instructions, and to gather PS input info in drivers.
|
||||
*
|
||||
* We don't set bases in any other IO intrinsics.
|
||||
*/
|
||||
bool
|
||||
ac_nir_assign_fs_input_locations(nir_shader *nir)
|
||||
{
|
||||
assert(nir->info.stage == MESA_SHADER_FRAGMENT);
|
||||
|
||||
return nir_shader_intrinsics_pass(nir, assign_fs_input_location,
|
||||
nir_metadata_all, NULL);
|
||||
}
|
||||
|
|
@ -1334,8 +1334,7 @@ visit_load_interpolated_input(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
{
|
||||
Temp dst = get_ssa_temp(ctx, &instr->def);
|
||||
Temp coords = get_ssa_temp(ctx, instr->src[0].ssa);
|
||||
unsigned idx =
|
||||
ac_nir_get_io_driver_location(ctx->shader, nir_intrinsic_io_semantics(instr).location, true);
|
||||
unsigned idx = nir_intrinsic_base(instr);
|
||||
unsigned component = nir_intrinsic_component(instr);
|
||||
bool high_16bits = nir_intrinsic_io_semantics(instr).high_16bits;
|
||||
Temp prim_mask = get_arg(ctx, ctx->args->prim_mask);
|
||||
|
|
@ -1472,7 +1471,7 @@ visit_load_fs_input(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
Temp prim_mask = get_arg(ctx, ctx->args->prim_mask);
|
||||
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(instr);
|
||||
unsigned idx = ac_nir_get_io_driver_location(ctx->shader, sem.location, true);
|
||||
unsigned idx = nir_intrinsic_base(instr);
|
||||
unsigned component = nir_intrinsic_component(instr);
|
||||
bool high_16bits = sem.high_16bits;
|
||||
unsigned vertex_id = 0; /* P0 */
|
||||
|
|
|
|||
|
|
@ -2633,8 +2633,7 @@ static LLVMValueRef visit_load_input(struct ac_nir_context *ctx, nir_intrinsic_i
|
|||
if (instr->intrinsic == nir_intrinsic_load_input_vertex)
|
||||
vertex_id = nir_src_as_uint(instr->src[0]);
|
||||
|
||||
nir_shader *nir = nir_cf_node_get_function(&instr->instr.block->cf_node)->function->shader;
|
||||
unsigned base = ac_nir_get_io_driver_location(nir, sem.location, true);
|
||||
unsigned base = nir_intrinsic_base(instr);
|
||||
LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, base, false);
|
||||
LLVMTypeRef dest_type = get_def_type(ctx, &instr->def);
|
||||
LLVMValueRef values[8];
|
||||
|
|
@ -2823,9 +2822,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
|||
assert(offset[0].i32 == 0);
|
||||
|
||||
LLVMValueRef interp_param = get_src(ctx, instr->src[0]);
|
||||
nir_shader *nir = nir_cf_node_get_function(&instr->instr.block->cf_node)->function->shader;
|
||||
unsigned index =
|
||||
ac_nir_get_io_driver_location(nir, nir_intrinsic_io_semantics(instr).location, true);
|
||||
unsigned index = nir_intrinsic_base(instr);
|
||||
unsigned component = nir_intrinsic_component(instr);
|
||||
result = load_interpolated_input(ctx, interp_param, index, component,
|
||||
instr->def.num_components, instr->def.bit_size,
|
||||
|
|
|
|||
|
|
@ -1982,6 +1982,9 @@ radv_fill_shader_info(const struct radv_compiler_info *compiler_info, const enum
|
|||
consider_force_vrs = radv_consider_force_vrs(gfx_state, &stages[i], &stages[MESA_SHADER_FRAGMENT]);
|
||||
}
|
||||
|
||||
if (i == MESA_SHADER_FRAGMENT)
|
||||
NIR_PASS(_, stages[i].nir, ac_nir_assign_fs_input_locations);
|
||||
|
||||
radv_nir_shader_info_pass(compiler_info, stages[i].nir, &stages[i].layout, &stages[i].key, gfx_state,
|
||||
pipeline_type, consider_force_vrs, &stages[i].info);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ gather_load_fs_input_info(const nir_shader *nir, const nir_intrinsic_instr *intr
|
|||
{
|
||||
const nir_io_semantics io_sem = nir_intrinsic_io_semantics(intrin);
|
||||
const unsigned location = io_sem.location;
|
||||
const unsigned mapped_location = ac_nir_get_io_driver_location(nir, io_sem.location, true);
|
||||
const unsigned mapped_location = nir_intrinsic_base(intrin);
|
||||
const unsigned attrib_count = io_sem.num_slots;
|
||||
const unsigned component = nir_intrinsic_component(intrin);
|
||||
|
||||
|
|
|
|||
|
|
@ -90,6 +90,8 @@ void si_get_shader_variant_info(struct si_shader *shader,
|
|||
nir_divergence_analysis(nir);
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
NIR_PASS(_, nir, ac_nir_assign_fs_input_locations);
|
||||
|
||||
/* Since flat+convergent and non-flat components can occur in the same vec4, start with
|
||||
* all PS inputs as flat and change them to smooth when we find a component that's
|
||||
* interpolated.
|
||||
|
|
@ -183,7 +185,7 @@ void si_get_shader_variant_info(struct si_shader *shader,
|
|||
shader->info.uses_vmem_load_other = true;
|
||||
} else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||
unsigned index = ac_nir_get_io_driver_location(nir, sem.location, true);
|
||||
unsigned index = nir_intrinsic_base(intr);
|
||||
assert(sem.num_slots == 1);
|
||||
|
||||
shader->info.num_ps_inputs = MAX2(shader->info.num_ps_inputs, index + 1);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue