nir/opt_16bit_tex_image: optimize packed conversions too

Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28730>
This commit is contained in:
Georg Lehmann 2024-04-13 22:16:15 +02:00 committed by Marge Bot
parent eeed928111
commit 603982ea80
4 changed files with 106 additions and 70 deletions

View file

@ -694,8 +694,9 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
};
struct nir_opt_16bit_tex_image_options opt_16bit_options = {
.rounding_mode = nir_rounding_mode_undef,
.opt_tex_dest_types = nir_type_float,
.opt_image_dest_types = nir_type_float,
.opt_tex_dest_types = nir_type_float | nir_type_int | nir_type_uint,
.opt_image_dest_types = nir_type_float | nir_type_int | nir_type_uint,
.integer_dest_saturates = true,
.opt_image_store_data = true,
.opt_image_srcs = true,
.opt_srcs_options_count = separate_g16 ? 2 : 1,

View file

@ -6315,6 +6315,7 @@ struct nir_opt_16bit_tex_image_options {
nir_rounding_mode rounding_mode;
nir_alu_type opt_tex_dest_types;
nir_alu_type opt_image_dest_types;
bool integer_dest_saturates;
bool opt_image_store_data;
bool opt_image_srcs;
unsigned opt_srcs_options_count;

View file

@ -616,49 +616,6 @@ nir_lower_mediump_vars(nir_shader *shader, nir_variable_mode modes)
return progress;
}
static bool
is_n_to_m_conversion(nir_instr *instr, unsigned n, nir_op m)
{
if (instr->type != nir_instr_type_alu)
return false;
nir_alu_instr *alu = nir_instr_as_alu(instr);
return alu->op == m && alu->src[0].src.ssa->bit_size == n;
}
static bool
is_f16_to_f32_conversion(nir_instr *instr)
{
return is_n_to_m_conversion(instr, 16, nir_op_f2f32);
}
static bool
is_f32_to_f16_conversion(nir_instr *instr)
{
return is_n_to_m_conversion(instr, 32, nir_op_f2f16) ||
is_n_to_m_conversion(instr, 32, nir_op_f2fmp);
}
static bool
is_i16_to_i32_conversion(nir_instr *instr)
{
return is_n_to_m_conversion(instr, 16, nir_op_i2i32);
}
static bool
is_u16_to_u32_conversion(nir_instr *instr)
{
return is_n_to_m_conversion(instr, 16, nir_op_u2u32);
}
static bool
is_i32_to_i16_conversion(nir_instr *instr)
{
return is_n_to_m_conversion(instr, 32, nir_op_i2i16) ||
is_n_to_m_conversion(instr, 32, nir_op_u2u16) ||
is_n_to_m_conversion(instr, 32, nir_op_i2imp);
}
/**
* Fix types of source operands of texture opcodes according to
* the constraints by inserting the appropriate conversion opcodes.
@ -788,16 +745,21 @@ can_opt_16bit_src(nir_def *ssa, nir_alu_type src_type, bool sext_matters)
can_opt &= const_is_i16(comp);
else if (opt_i16_u16)
can_opt &= (const_is_u16(comp) || const_is_i16(comp));
} else if (nir_scalar_is_alu(comp)) {
nir_alu_instr *alu = nir_instr_as_alu(comp.def->parent_instr);
if (alu->src[0].src.ssa->bit_size != 16)
return false;
if (alu->op == nir_op_f2f32)
can_opt &= opt_f16;
else if (alu->op == nir_op_i2i32)
can_opt &= opt_i16 || opt_i16_u16;
else if (alu->op == nir_op_u2u32)
can_opt &= opt_u16 || opt_i16_u16;
else
return false;
} else {
if (opt_f16)
can_opt &= is_f16_to_f32_conversion(comp.def->parent_instr);
else if (opt_u16)
can_opt &= is_u16_to_u32_conversion(comp.def->parent_instr);
else if (opt_i16)
can_opt &= is_i16_to_i32_conversion(comp.def->parent_instr);
else if (opt_i16_u16)
can_opt &= (is_i16_to_i32_conversion(comp.def->parent_instr) ||
is_u16_to_u32_conversion(comp.def->parent_instr));
return false;
}
}
@ -855,31 +817,102 @@ static bool
opt_16bit_destination(nir_def *ssa, nir_alu_type dest_type, unsigned exec_mode,
struct nir_opt_16bit_tex_image_options *options)
{
bool is_f32_to_f16 = dest_type == nir_type_float32;
bool is_i32_to_i16 = dest_type == nir_type_int32 || dest_type == nir_type_uint32;
bool opt_f2f16 = dest_type == nir_type_float32;
bool opt_i2i16 = (dest_type == nir_type_int32 || dest_type == nir_type_uint32) &&
!options->integer_dest_saturates;
bool opt_i2i16_sat = dest_type == nir_type_int32 && options->integer_dest_saturates;
bool opt_u2u16_sat = dest_type == nir_type_uint32 && options->integer_dest_saturates;
nir_rounding_mode rdm = options->rounding_mode;
nir_rounding_mode src_rdm =
nir_get_rounding_mode_from_float_controls(exec_mode, nir_type_float16);
bool allow_standard = (src_rdm == rdm || src_rdm == nir_rounding_mode_undef);
bool allow_rtz = rdm == nir_rounding_mode_rtz;
bool allow_rtne = rdm == nir_rounding_mode_rtne;
nir_foreach_use(use, ssa) {
nir_instr *instr = nir_src_parent_instr(use);
is_f32_to_f16 &= (allow_standard && is_f32_to_f16_conversion(instr)) ||
(allow_rtz && is_n_to_m_conversion(instr, 32, nir_op_f2f16_rtz)) ||
(allow_rtne && is_n_to_m_conversion(instr, 32, nir_op_f2f16_rtne));
is_i32_to_i16 &= is_i32_to_i16_conversion(instr);
}
if (instr->type != nir_instr_type_alu)
return false;
if (!is_f32_to_f16 && !is_i32_to_i16)
return false;
nir_alu_instr *alu = nir_instr_as_alu(instr);
switch (alu->op) {
case nir_op_pack_half_2x16_split:
if (alu->src[0].src.ssa != alu->src[1].src.ssa)
return false;
FALLTHROUGH;
case nir_op_pack_half_2x16:
/* pack_half rounding is undefined */
if (!opt_f2f16)
return false;
break;
case nir_op_pack_half_2x16_rtz_split:
if (alu->src[0].src.ssa != alu->src[1].src.ssa)
return false;
FALLTHROUGH;
case nir_op_f2f16_rtz:
if (rdm != nir_rounding_mode_rtz || !opt_f2f16)
return false;
break;
case nir_op_f2f16_rtne:
if (rdm != nir_rounding_mode_rtne || !opt_f2f16)
return false;
break;
case nir_op_f2f16:
case nir_op_f2fmp:
if (src_rdm != rdm && src_rdm != nir_rounding_mode_undef)
return false;
if (!opt_f2f16)
return false;
break;
case nir_op_i2i16:
case nir_op_i2imp:
case nir_op_u2u16:
if (!opt_i2i16)
return false;
break;
case nir_op_pack_sint_2x16:
if (!opt_i2i16_sat)
return false;
break;
case nir_op_pack_uint_2x16:
if (!opt_u2u16_sat)
return false;
break;
default:
return false;
}
}
/* All uses are the same conversions. Replace them with mov. */
nir_foreach_use(use, ssa) {
nir_alu_instr *conv = nir_instr_as_alu(nir_src_parent_instr(use));
conv->op = nir_op_mov;
nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(use));
switch (alu->op) {
case nir_op_f2f16_rtne:
case nir_op_f2f16_rtz:
case nir_op_f2f16:
case nir_op_f2fmp:
case nir_op_i2i16:
case nir_op_i2imp:
case nir_op_u2u16:
alu->op = nir_op_mov;
break;
case nir_op_pack_half_2x16_rtz_split:
case nir_op_pack_half_2x16_split:
alu->op = nir_op_pack_32_2x16_split;
break;
case nir_op_pack_32_2x16_split:
/* Split opcodes have two operands, so the iteration
* for the second use will already observe the
* updated opcode.
*/
break;
case nir_op_pack_half_2x16:
case nir_op_pack_sint_2x16:
case nir_op_pack_uint_2x16:
alu->op = nir_op_pack_32_2x16;
break;
default:
unreachable("unsupported conversion op");
};
}
ssa->bit_size = 16;

View file

@ -209,8 +209,9 @@ static void si_late_optimize_16bit_samplers(struct si_screen *sscreen, nir_shade
};
struct nir_opt_16bit_tex_image_options opt_16bit_options = {
.rounding_mode = nir_rounding_mode_undef,
.opt_tex_dest_types = nir_type_float,
.opt_image_dest_types = nir_type_float,
.opt_tex_dest_types = nir_type_float | nir_type_int | nir_type_uint,
.opt_image_dest_types = nir_type_float | nir_type_int | nir_type_uint,
.integer_dest_saturates = true,
.opt_image_store_data = true,
.opt_image_srcs = true,
.opt_srcs_options_count = has_g16 ? 2 : 1,