radeonsi: enable 16-bit mediump IO for PS outputs only, and VS->PS with env var

It has been implemented and works for PS outputs already.

The lowering callback needs 2 variants because we can't access
pipe_screen from it. The callback is rewritten to be more general.

We also need to do nir_clear_mediump_io_flag for any outputs we don't
lower because the mediump flag might prevent optimizations if it's not
cleared.

v2: fix si_nir_optim

Acked-by: Timur Kristóf <timur.kristof@gmail.com> (v1)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35529>
This commit is contained in:
Marek Olšák 2025-06-01 18:10:28 -04:00 committed by Marge Bot
parent 5a7ff54aaa
commit fbbf029529
4 changed files with 68 additions and 16 deletions

View file

@ -877,7 +877,8 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
(sscreen->info.family >= CHIP_GFX940 && !sscreen->info.has_graphics) ||
/* fma32 is too slow for gpu < gfx9, so apply the option only for gpu >= gfx9 */
(sscreen->info.gfx_level >= GFX9 && sscreen->options.force_use_fma32);
bool has_mediump = sscreen->info.gfx_level >= GFX9 && sscreen->options.mediump;
/* GFX8 has precision issues with 16-bit PS outputs. */
bool has_16bit_io = sscreen->info.gfx_level >= GFX9;
nir_shader_compiler_options *options = sscreen->nir_options;
ac_nir_set_options(&sscreen->info, !sscreen->use_aco, options);
@ -904,10 +905,14 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
* GFX8 has precision issues with this option.
*/
options->force_f2f16_rtz = sscreen->info.gfx_level >= GFX9;
options->io_options |= (!has_mediump ? nir_io_mediump_is_32bit : 0) | nir_io_has_intrinsics |
options->io_options |= (!has_16bit_io ? nir_io_mediump_is_32bit : 0) | nir_io_has_intrinsics |
(sscreen->use_ngg_culling ?
nir_io_compaction_groups_tes_inputs_into_pos_and_var_groups : 0);
options->lower_mediump_io = has_mediump ? si_lower_mediump_io : NULL;
if (has_16bit_io) {
options->lower_mediump_io = sscreen->options.mediump ? si_lower_mediump_io_option
: si_lower_mediump_io_default;
}
/* HW supports indirect indexing for: | Enabled in driver
* -------------------------------------------------------
* TCS inputs | Yes

View file

@ -83,11 +83,22 @@ get_output_as_const_value(nir_shader *shader, float values[4])
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic == nir_intrinsic_store_output) {
nir_const_value *c = nir_src_as_const_value(intrin->src[0]);
if (c) {
if (!c)
return false;
if (intrin->src[0].ssa->bit_size == 16 &&
!(nir_intrinsic_src_type(intrin) & nir_type_float))
return false;
if (intrin->src[0].ssa->bit_size == 16) {
uint16_t half_values[4];
nir_const_value_to_array(half_values, c, 4, u16);
for (unsigned i = 0; i < 4; i++)
values[i] = _mesa_half_to_float(half_values[i]);
} else {
nir_const_value_to_array(values, c, 4, f32);
return true;
}
return false;
return true;
}
FALLTHROUGH;
}
@ -128,7 +139,11 @@ replace_tex_by_imm(nir_builder *b, nir_tex_instr *tex, void *state)
return false;
b->cursor = nir_instr_remove(&tex->instr);
nir_def *imm = nir_imm_vec4(b, p->value[0], p->value[1], p->value[2], p->value[3]);
nir_def *imm;
if (tex->def.bit_size == 16)
imm = nir_imm_vec4_16(b, p->value[0], p->value[1], p->value[2], p->value[3]);
else
imm = nir_imm_vec4(b, p->value[0], p->value[1], p->value[2], p->value[3]);
nir_def_rewrite_uses(&tex->def, imm);
return true;
}

View file

@ -914,7 +914,8 @@ void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir,
struct si_shader_info *info, bool colors_lowered);
/* si_shader_nir.c */
void si_lower_mediump_io(struct nir_shader *nir);
void si_lower_mediump_io_default(nir_shader *nir);
void si_lower_mediump_io_option(struct nir_shader *nir);
bool si_alu_to_scalar_packed_math_filter(const struct nir_instr *instr, const void *data);
void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool has_array_temps);

View file

@ -237,15 +237,46 @@ static bool si_lower_intrinsics(nir_shader *nir)
NULL);
}
void si_lower_mediump_io(nir_shader *nir)
static bool can_lower_mediump_io(gl_shader_stage prev_stage, bool prev_stage_has_xfb,
gl_shader_stage next_stage, bool config_option)
{
NIR_PASS_V(nir, nir_lower_mediump_io,
/* TODO: LLVM fails to compile this test if VS inputs are 16-bit:
* dEQP-GLES31.functional.shaders.builtin_functions.integer.bitfieldinsert.uvec3_lowp_geometry
*/
(nir->info.stage != MESA_SHADER_VERTEX ? nir_var_shader_in : 0) | nir_var_shader_out,
VARYING_BIT_PNTC | BITFIELD64_RANGE(VARYING_SLOT_VAR0, 32),
true);
/* This is the filter that determines when mediump IO is lowered.
*
* NOTE: LLVM fails to compile this test if VS inputs are 16-bit:
* dEQP-GLES31.functional.shaders.builtin_functions.integer.bitfieldinsert.uvec3_lowp_geometry
*/
return (prev_stage == MESA_SHADER_VERTEX && next_stage == MESA_SHADER_FRAGMENT &&
!prev_stage_has_xfb && config_option) ||
prev_stage == MESA_SHADER_FRAGMENT;
}
static void lower_mediump_io(nir_shader *nir, bool config_option)
{
nir_variable_mode modes = 0;
if (can_lower_mediump_io(nir->info.stage, nir->xfb_info != NULL, nir->info.next_stage,
config_option))
modes |= nir_var_shader_out;
if (can_lower_mediump_io(nir->info.prev_stage, nir->info.prev_stage_has_xfb, nir->info.stage,
config_option))
modes |= nir_var_shader_in;
if (modes) {
NIR_PASS(_, nir, nir_lower_mediump_io, modes,
VARYING_BIT_PNTC | BITFIELD64_RANGE(VARYING_SLOT_VAR0, 32), true);
}
NIR_PASS(_, nir, nir_clear_mediump_io_flag);
}
void si_lower_mediump_io_default(nir_shader *nir)
{
lower_mediump_io(nir, false);
}
void si_lower_mediump_io_option(nir_shader *nir)
{
lower_mediump_io(nir, true);
}
/**