diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index 82d98da042e..60734fc5c2b 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -877,7 +877,8 @@ void si_init_screen_get_functions(struct si_screen *sscreen) (sscreen->info.family >= CHIP_GFX940 && !sscreen->info.has_graphics) || /* fma32 is too slow for gpu < gfx9, so apply the option only for gpu >= gfx9 */ (sscreen->info.gfx_level >= GFX9 && sscreen->options.force_use_fma32); - bool has_mediump = sscreen->info.gfx_level >= GFX9 && sscreen->options.mediump; + /* GFX8 has precision issues with 16-bit PS outputs. */ + bool has_16bit_io = sscreen->info.gfx_level >= GFX9; nir_shader_compiler_options *options = sscreen->nir_options; ac_nir_set_options(&sscreen->info, !sscreen->use_aco, options); @@ -904,10 +905,14 @@ void si_init_screen_get_functions(struct si_screen *sscreen) * GFX8 has precision issues with this option. */ options->force_f2f16_rtz = sscreen->info.gfx_level >= GFX9; - options->io_options |= (!has_mediump ? nir_io_mediump_is_32bit : 0) | nir_io_has_intrinsics | + options->io_options |= (!has_16bit_io ? nir_io_mediump_is_32bit : 0) | nir_io_has_intrinsics | (sscreen->use_ngg_culling ? nir_io_compaction_groups_tes_inputs_into_pos_and_var_groups : 0); - options->lower_mediump_io = has_mediump ? si_lower_mediump_io : NULL; + if (has_16bit_io) { + options->lower_mediump_io = sscreen->options.mediump ? si_lower_mediump_io_option + : si_lower_mediump_io_default; + } + /* HW supports indirect indexing for: | Enabled in driver * ------------------------------------------------------- * TCS inputs | Yes diff --git a/src/gallium/drivers/radeonsi/si_nir_optim.c b/src/gallium/drivers/radeonsi/si_nir_optim.c index 08f6928e563..a213911839e 100644 --- a/src/gallium/drivers/radeonsi/si_nir_optim.c +++ b/src/gallium/drivers/radeonsi/si_nir_optim.c @@ -83,11 +83,22 @@ get_output_as_const_value(nir_shader *shader, float values[4]) nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); if (intrin->intrinsic == nir_intrinsic_store_output) { nir_const_value *c = nir_src_as_const_value(intrin->src[0]); - if (c) { + if (!c) + return false; + + if (intrin->src[0].ssa->bit_size == 16 && + !(nir_intrinsic_src_type(intrin) & nir_type_float)) + return false; + + if (intrin->src[0].ssa->bit_size == 16) { + uint16_t half_values[4]; + nir_const_value_to_array(half_values, c, 4, u16); + for (unsigned i = 0; i < 4; i++) + values[i] = _mesa_half_to_float(half_values[i]); + } else { nir_const_value_to_array(values, c, 4, f32); - return true; } - return false; + return true; } FALLTHROUGH; } @@ -128,7 +139,11 @@ replace_tex_by_imm(nir_builder *b, nir_tex_instr *tex, void *state) return false; b->cursor = nir_instr_remove(&tex->instr); - nir_def *imm = nir_imm_vec4(b, p->value[0], p->value[1], p->value[2], p->value[3]); + nir_def *imm; + if (tex->def.bit_size == 16) + imm = nir_imm_vec4_16(b, p->value[0], p->value[1], p->value[2], p->value[3]); + else + imm = nir_imm_vec4(b, p->value[0], p->value[1], p->value[2], p->value[3]); nir_def_rewrite_uses(&tex->def, imm); return true; } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index d4481bd75a2..a1a85c52d6a 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -914,7 +914,8 @@ void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir, struct si_shader_info *info, bool colors_lowered); /* si_shader_nir.c */ -void si_lower_mediump_io(struct nir_shader *nir); +void si_lower_mediump_io_default(nir_shader *nir); +void si_lower_mediump_io_option(struct nir_shader *nir); bool si_alu_to_scalar_packed_math_filter(const struct nir_instr *instr, const void *data); void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool has_array_temps); diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 1cbd1cba786..fbc20d5c80e 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -237,15 +237,46 @@ static bool si_lower_intrinsics(nir_shader *nir) NULL); } -void si_lower_mediump_io(nir_shader *nir) +static bool can_lower_mediump_io(gl_shader_stage prev_stage, bool prev_stage_has_xfb, + gl_shader_stage next_stage, bool config_option) { - NIR_PASS_V(nir, nir_lower_mediump_io, - /* TODO: LLVM fails to compile this test if VS inputs are 16-bit: - * dEQP-GLES31.functional.shaders.builtin_functions.integer.bitfieldinsert.uvec3_lowp_geometry - */ - (nir->info.stage != MESA_SHADER_VERTEX ? nir_var_shader_in : 0) | nir_var_shader_out, - VARYING_BIT_PNTC | BITFIELD64_RANGE(VARYING_SLOT_VAR0, 32), - true); + /* This is the filter that determines when mediump IO is lowered. + * + * NOTE: LLVM fails to compile this test if VS inputs are 16-bit: + * dEQP-GLES31.functional.shaders.builtin_functions.integer.bitfieldinsert.uvec3_lowp_geometry + */ + return (prev_stage == MESA_SHADER_VERTEX && next_stage == MESA_SHADER_FRAGMENT && + !prev_stage_has_xfb && config_option) || + prev_stage == MESA_SHADER_FRAGMENT; +} + +static void lower_mediump_io(nir_shader *nir, bool config_option) +{ + nir_variable_mode modes = 0; + + if (can_lower_mediump_io(nir->info.stage, nir->xfb_info != NULL, nir->info.next_stage, + config_option)) + modes |= nir_var_shader_out; + + if (can_lower_mediump_io(nir->info.prev_stage, nir->info.prev_stage_has_xfb, nir->info.stage, + config_option)) + modes |= nir_var_shader_in; + + if (modes) { + NIR_PASS(_, nir, nir_lower_mediump_io, modes, + VARYING_BIT_PNTC | BITFIELD64_RANGE(VARYING_SLOT_VAR0, 32), true); + } + NIR_PASS(_, nir, nir_clear_mediump_io_flag); +} + +void si_lower_mediump_io_default(nir_shader *nir) +{ + lower_mediump_io(nir, false); +} + +void si_lower_mediump_io_option(nir_shader *nir) +{ + lower_mediump_io(nir, true); } /**