diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 951508026a2..a50805207f9 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -694,8 +694,9 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat }; struct nir_opt_16bit_tex_image_options opt_16bit_options = { .rounding_mode = nir_rounding_mode_undef, - .opt_tex_dest_types = nir_type_float, - .opt_image_dest_types = nir_type_float, + .opt_tex_dest_types = nir_type_float | nir_type_int | nir_type_uint, + .opt_image_dest_types = nir_type_float | nir_type_int | nir_type_uint, + .integer_dest_saturates = true, .opt_image_store_data = true, .opt_image_srcs = true, .opt_srcs_options_count = separate_g16 ? 2 : 1, diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index c0a904e76cb..f25f67428bd 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -6315,6 +6315,7 @@ struct nir_opt_16bit_tex_image_options { nir_rounding_mode rounding_mode; nir_alu_type opt_tex_dest_types; nir_alu_type opt_image_dest_types; + bool integer_dest_saturates; bool opt_image_store_data; bool opt_image_srcs; unsigned opt_srcs_options_count; diff --git a/src/compiler/nir/nir_lower_mediump.c b/src/compiler/nir/nir_lower_mediump.c index 1b869ff5214..498ff7e8a0e 100644 --- a/src/compiler/nir/nir_lower_mediump.c +++ b/src/compiler/nir/nir_lower_mediump.c @@ -616,49 +616,6 @@ nir_lower_mediump_vars(nir_shader *shader, nir_variable_mode modes) return progress; } -static bool -is_n_to_m_conversion(nir_instr *instr, unsigned n, nir_op m) -{ - if (instr->type != nir_instr_type_alu) - return false; - - nir_alu_instr *alu = nir_instr_as_alu(instr); - return alu->op == m && alu->src[0].src.ssa->bit_size == n; -} - -static bool -is_f16_to_f32_conversion(nir_instr *instr) -{ - return is_n_to_m_conversion(instr, 16, nir_op_f2f32); -} - -static bool -is_f32_to_f16_conversion(nir_instr *instr) -{ - return is_n_to_m_conversion(instr, 32, nir_op_f2f16) || - is_n_to_m_conversion(instr, 32, nir_op_f2fmp); -} - -static bool -is_i16_to_i32_conversion(nir_instr *instr) -{ - return is_n_to_m_conversion(instr, 16, nir_op_i2i32); -} - -static bool -is_u16_to_u32_conversion(nir_instr *instr) -{ - return is_n_to_m_conversion(instr, 16, nir_op_u2u32); -} - -static bool -is_i32_to_i16_conversion(nir_instr *instr) -{ - return is_n_to_m_conversion(instr, 32, nir_op_i2i16) || - is_n_to_m_conversion(instr, 32, nir_op_u2u16) || - is_n_to_m_conversion(instr, 32, nir_op_i2imp); -} - /** * Fix types of source operands of texture opcodes according to * the constraints by inserting the appropriate conversion opcodes. @@ -788,16 +745,21 @@ can_opt_16bit_src(nir_def *ssa, nir_alu_type src_type, bool sext_matters) can_opt &= const_is_i16(comp); else if (opt_i16_u16) can_opt &= (const_is_u16(comp) || const_is_i16(comp)); + } else if (nir_scalar_is_alu(comp)) { + nir_alu_instr *alu = nir_instr_as_alu(comp.def->parent_instr); + if (alu->src[0].src.ssa->bit_size != 16) + return false; + + if (alu->op == nir_op_f2f32) + can_opt &= opt_f16; + else if (alu->op == nir_op_i2i32) + can_opt &= opt_i16 || opt_i16_u16; + else if (alu->op == nir_op_u2u32) + can_opt &= opt_u16 || opt_i16_u16; + else + return false; } else { - if (opt_f16) - can_opt &= is_f16_to_f32_conversion(comp.def->parent_instr); - else if (opt_u16) - can_opt &= is_u16_to_u32_conversion(comp.def->parent_instr); - else if (opt_i16) - can_opt &= is_i16_to_i32_conversion(comp.def->parent_instr); - else if (opt_i16_u16) - can_opt &= (is_i16_to_i32_conversion(comp.def->parent_instr) || - is_u16_to_u32_conversion(comp.def->parent_instr)); + return false; } } @@ -855,31 +817,102 @@ static bool opt_16bit_destination(nir_def *ssa, nir_alu_type dest_type, unsigned exec_mode, struct nir_opt_16bit_tex_image_options *options) { - bool is_f32_to_f16 = dest_type == nir_type_float32; - bool is_i32_to_i16 = dest_type == nir_type_int32 || dest_type == nir_type_uint32; + bool opt_f2f16 = dest_type == nir_type_float32; + bool opt_i2i16 = (dest_type == nir_type_int32 || dest_type == nir_type_uint32) && + !options->integer_dest_saturates; + bool opt_i2i16_sat = dest_type == nir_type_int32 && options->integer_dest_saturates; + bool opt_u2u16_sat = dest_type == nir_type_uint32 && options->integer_dest_saturates; nir_rounding_mode rdm = options->rounding_mode; nir_rounding_mode src_rdm = nir_get_rounding_mode_from_float_controls(exec_mode, nir_type_float16); - bool allow_standard = (src_rdm == rdm || src_rdm == nir_rounding_mode_undef); - bool allow_rtz = rdm == nir_rounding_mode_rtz; - bool allow_rtne = rdm == nir_rounding_mode_rtne; nir_foreach_use(use, ssa) { nir_instr *instr = nir_src_parent_instr(use); - is_f32_to_f16 &= (allow_standard && is_f32_to_f16_conversion(instr)) || - (allow_rtz && is_n_to_m_conversion(instr, 32, nir_op_f2f16_rtz)) || - (allow_rtne && is_n_to_m_conversion(instr, 32, nir_op_f2f16_rtne)); - is_i32_to_i16 &= is_i32_to_i16_conversion(instr); - } + if (instr->type != nir_instr_type_alu) + return false; - if (!is_f32_to_f16 && !is_i32_to_i16) - return false; + nir_alu_instr *alu = nir_instr_as_alu(instr); + + switch (alu->op) { + case nir_op_pack_half_2x16_split: + if (alu->src[0].src.ssa != alu->src[1].src.ssa) + return false; + FALLTHROUGH; + case nir_op_pack_half_2x16: + /* pack_half rounding is undefined */ + if (!opt_f2f16) + return false; + break; + case nir_op_pack_half_2x16_rtz_split: + if (alu->src[0].src.ssa != alu->src[1].src.ssa) + return false; + FALLTHROUGH; + case nir_op_f2f16_rtz: + if (rdm != nir_rounding_mode_rtz || !opt_f2f16) + return false; + break; + case nir_op_f2f16_rtne: + if (rdm != nir_rounding_mode_rtne || !opt_f2f16) + return false; + break; + case nir_op_f2f16: + case nir_op_f2fmp: + if (src_rdm != rdm && src_rdm != nir_rounding_mode_undef) + return false; + if (!opt_f2f16) + return false; + break; + case nir_op_i2i16: + case nir_op_i2imp: + case nir_op_u2u16: + if (!opt_i2i16) + return false; + break; + case nir_op_pack_sint_2x16: + if (!opt_i2i16_sat) + return false; + break; + case nir_op_pack_uint_2x16: + if (!opt_u2u16_sat) + return false; + break; + default: + return false; + } + } /* All uses are the same conversions. Replace them with mov. */ nir_foreach_use(use, ssa) { - nir_alu_instr *conv = nir_instr_as_alu(nir_src_parent_instr(use)); - conv->op = nir_op_mov; + nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(use)); + switch (alu->op) { + case nir_op_f2f16_rtne: + case nir_op_f2f16_rtz: + case nir_op_f2f16: + case nir_op_f2fmp: + case nir_op_i2i16: + case nir_op_i2imp: + case nir_op_u2u16: + alu->op = nir_op_mov; + break; + case nir_op_pack_half_2x16_rtz_split: + case nir_op_pack_half_2x16_split: + alu->op = nir_op_pack_32_2x16_split; + break; + case nir_op_pack_32_2x16_split: + /* Split opcodes have two operands, so the iteration + * for the second use will already observe the + * updated opcode. + */ + break; + case nir_op_pack_half_2x16: + case nir_op_pack_sint_2x16: + case nir_op_pack_uint_2x16: + alu->op = nir_op_pack_32_2x16; + break; + default: + unreachable("unsupported conversion op"); + }; } ssa->bit_size = 16; diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index c86ea98f059..81cf051dedc 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -209,8 +209,9 @@ static void si_late_optimize_16bit_samplers(struct si_screen *sscreen, nir_shade }; struct nir_opt_16bit_tex_image_options opt_16bit_options = { .rounding_mode = nir_rounding_mode_undef, - .opt_tex_dest_types = nir_type_float, - .opt_image_dest_types = nir_type_float, + .opt_tex_dest_types = nir_type_float | nir_type_int | nir_type_uint, + .opt_image_dest_types = nir_type_float | nir_type_int | nir_type_uint, + .integer_dest_saturates = true, .opt_image_store_data = true, .opt_image_srcs = true, .opt_srcs_options_count = has_g16 ? 2 : 1,