nir/opt_vectorize_io: work around a 16-bit IO bug for RADV

If nir_opt_vectorize_io isn't called, 16-bit IO is broken.
This is a workaround to keep RADV working and consume incorrect NIR
while other drivers consume correct NIR.

Hopefully this will be removed ASAP.

Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35315>
This commit is contained in:
Marek Olšák 2025-06-08 19:54:21 -04:00 committed by Marge Bot
parent 6e9e9c9f0c
commit 0cbcb72869
3 changed files with 25 additions and 5 deletions

View file

@ -64,7 +64,7 @@ get_nir_options_for_stage(struct radv_physical_device *pdev, gl_shader_stage sta
options->max_unroll_iterations = 32;
options->max_unroll_iterations_aggressive = 128;
options->lower_doubles_options = nir_lower_drcp | nir_lower_dsqrt | nir_lower_drsq | nir_lower_ddiv;
options->io_options |= nir_io_mediump_is_32bit;
options->io_options |= nir_io_mediump_is_32bit | nir_io_radv_intrinsic_component_workaround;
options->varying_expression_max_cost = ac_nir_varying_expression_max_cost;
}

View file

@ -131,6 +131,13 @@ typedef enum {
vectorize_the_rest,
} nir_vectorize_op_step;
static bool
apply_radv_workaround(nir_builder *b)
{
return b->shader->options->io_options &
nir_io_radv_intrinsic_component_workaround;
}
static void
vectorize_load(nir_intrinsic_instr *chan[8], unsigned start, unsigned count,
nir_vectorize_op_step step)
@ -158,7 +165,10 @@ vectorize_load(nir_intrinsic_instr *chan[8], unsigned start, unsigned count,
memcpy(new_intr->src, first->src,
nir_intrinsic_infos[first->intrinsic].num_srcs * sizeof(nir_src));
nir_intrinsic_copy_const_indices(new_intr, first);
nir_intrinsic_set_component(new_intr, start & 0x3); /* Bits 4..7 should map to 0..3 */
if (apply_radv_workaround(&b))
nir_intrinsic_set_component(new_intr, start);
else
nir_intrinsic_set_component(new_intr, start & 0x3); /* Bits 4..7 should map to 0..3 */
assert(start % 4 + count <= 4);
nir_io_semantics sem = nir_intrinsic_io_semantics(new_intr);
@ -308,15 +318,18 @@ vectorize_store(nir_intrinsic_instr *chan[8], unsigned start, unsigned count,
/* TODO: Merge names? */
nir_builder b = nir_builder_at(nir_before_instr(&last->instr));
/* Update the rest. */
nir_intrinsic_set_io_semantics(last, sem);
nir_intrinsic_set_component(last, start & 0x3); /* Bits 4..7 should map to 0..3 */
if (apply_radv_workaround(&b))
nir_intrinsic_set_component(last, start);
else
nir_intrinsic_set_component(last, start & 0x3); /* Bits 4..7 should map to 0..3 */
assert(start % 4 + count <= 4);
nir_intrinsic_set_write_mask(last, BITFIELD_MASK(count));
last->num_components = count;
nir_builder b = nir_builder_at(nir_before_instr(&last->instr));
/* Replace the stored scalar with the vector. */
if (step == merge_low_high_16_to_32) {
nir_def *value[4];

View file

@ -180,6 +180,13 @@ typedef enum {
*/
nir_io_compaction_groups_tes_inputs_into_pos_and_var_groups = BITFIELD_BIT(9),
/**
* RADV expects that high 16 bits of outputs set component >= 4. That's not
* legal in NIR, but RADV unfortunately relies on it because it's not
* validated.
*/
nir_io_radv_intrinsic_component_workaround = BITFIELD_BIT(10),
/* Options affecting the GLSL compiler or Gallium are below. */
/**