mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 17:30:12 +01:00
nir/opt_16bit_tex_image: optimize packed conversions too
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28730>
This commit is contained in:
parent
eeed928111
commit
603982ea80
4 changed files with 106 additions and 70 deletions
|
|
@ -694,8 +694,9 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
};
|
||||
struct nir_opt_16bit_tex_image_options opt_16bit_options = {
|
||||
.rounding_mode = nir_rounding_mode_undef,
|
||||
.opt_tex_dest_types = nir_type_float,
|
||||
.opt_image_dest_types = nir_type_float,
|
||||
.opt_tex_dest_types = nir_type_float | nir_type_int | nir_type_uint,
|
||||
.opt_image_dest_types = nir_type_float | nir_type_int | nir_type_uint,
|
||||
.integer_dest_saturates = true,
|
||||
.opt_image_store_data = true,
|
||||
.opt_image_srcs = true,
|
||||
.opt_srcs_options_count = separate_g16 ? 2 : 1,
|
||||
|
|
|
|||
|
|
@ -6315,6 +6315,7 @@ struct nir_opt_16bit_tex_image_options {
|
|||
nir_rounding_mode rounding_mode;
|
||||
nir_alu_type opt_tex_dest_types;
|
||||
nir_alu_type opt_image_dest_types;
|
||||
bool integer_dest_saturates;
|
||||
bool opt_image_store_data;
|
||||
bool opt_image_srcs;
|
||||
unsigned opt_srcs_options_count;
|
||||
|
|
|
|||
|
|
@ -616,49 +616,6 @@ nir_lower_mediump_vars(nir_shader *shader, nir_variable_mode modes)
|
|||
return progress;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_n_to_m_conversion(nir_instr *instr, unsigned n, nir_op m)
|
||||
{
|
||||
if (instr->type != nir_instr_type_alu)
|
||||
return false;
|
||||
|
||||
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
||||
return alu->op == m && alu->src[0].src.ssa->bit_size == n;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_f16_to_f32_conversion(nir_instr *instr)
|
||||
{
|
||||
return is_n_to_m_conversion(instr, 16, nir_op_f2f32);
|
||||
}
|
||||
|
||||
static bool
|
||||
is_f32_to_f16_conversion(nir_instr *instr)
|
||||
{
|
||||
return is_n_to_m_conversion(instr, 32, nir_op_f2f16) ||
|
||||
is_n_to_m_conversion(instr, 32, nir_op_f2fmp);
|
||||
}
|
||||
|
||||
static bool
|
||||
is_i16_to_i32_conversion(nir_instr *instr)
|
||||
{
|
||||
return is_n_to_m_conversion(instr, 16, nir_op_i2i32);
|
||||
}
|
||||
|
||||
static bool
|
||||
is_u16_to_u32_conversion(nir_instr *instr)
|
||||
{
|
||||
return is_n_to_m_conversion(instr, 16, nir_op_u2u32);
|
||||
}
|
||||
|
||||
static bool
|
||||
is_i32_to_i16_conversion(nir_instr *instr)
|
||||
{
|
||||
return is_n_to_m_conversion(instr, 32, nir_op_i2i16) ||
|
||||
is_n_to_m_conversion(instr, 32, nir_op_u2u16) ||
|
||||
is_n_to_m_conversion(instr, 32, nir_op_i2imp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fix types of source operands of texture opcodes according to
|
||||
* the constraints by inserting the appropriate conversion opcodes.
|
||||
|
|
@ -788,16 +745,21 @@ can_opt_16bit_src(nir_def *ssa, nir_alu_type src_type, bool sext_matters)
|
|||
can_opt &= const_is_i16(comp);
|
||||
else if (opt_i16_u16)
|
||||
can_opt &= (const_is_u16(comp) || const_is_i16(comp));
|
||||
} else if (nir_scalar_is_alu(comp)) {
|
||||
nir_alu_instr *alu = nir_instr_as_alu(comp.def->parent_instr);
|
||||
if (alu->src[0].src.ssa->bit_size != 16)
|
||||
return false;
|
||||
|
||||
if (alu->op == nir_op_f2f32)
|
||||
can_opt &= opt_f16;
|
||||
else if (alu->op == nir_op_i2i32)
|
||||
can_opt &= opt_i16 || opt_i16_u16;
|
||||
else if (alu->op == nir_op_u2u32)
|
||||
can_opt &= opt_u16 || opt_i16_u16;
|
||||
else
|
||||
return false;
|
||||
} else {
|
||||
if (opt_f16)
|
||||
can_opt &= is_f16_to_f32_conversion(comp.def->parent_instr);
|
||||
else if (opt_u16)
|
||||
can_opt &= is_u16_to_u32_conversion(comp.def->parent_instr);
|
||||
else if (opt_i16)
|
||||
can_opt &= is_i16_to_i32_conversion(comp.def->parent_instr);
|
||||
else if (opt_i16_u16)
|
||||
can_opt &= (is_i16_to_i32_conversion(comp.def->parent_instr) ||
|
||||
is_u16_to_u32_conversion(comp.def->parent_instr));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -855,31 +817,102 @@ static bool
|
|||
opt_16bit_destination(nir_def *ssa, nir_alu_type dest_type, unsigned exec_mode,
|
||||
struct nir_opt_16bit_tex_image_options *options)
|
||||
{
|
||||
bool is_f32_to_f16 = dest_type == nir_type_float32;
|
||||
bool is_i32_to_i16 = dest_type == nir_type_int32 || dest_type == nir_type_uint32;
|
||||
bool opt_f2f16 = dest_type == nir_type_float32;
|
||||
bool opt_i2i16 = (dest_type == nir_type_int32 || dest_type == nir_type_uint32) &&
|
||||
!options->integer_dest_saturates;
|
||||
bool opt_i2i16_sat = dest_type == nir_type_int32 && options->integer_dest_saturates;
|
||||
bool opt_u2u16_sat = dest_type == nir_type_uint32 && options->integer_dest_saturates;
|
||||
|
||||
nir_rounding_mode rdm = options->rounding_mode;
|
||||
nir_rounding_mode src_rdm =
|
||||
nir_get_rounding_mode_from_float_controls(exec_mode, nir_type_float16);
|
||||
bool allow_standard = (src_rdm == rdm || src_rdm == nir_rounding_mode_undef);
|
||||
bool allow_rtz = rdm == nir_rounding_mode_rtz;
|
||||
bool allow_rtne = rdm == nir_rounding_mode_rtne;
|
||||
|
||||
nir_foreach_use(use, ssa) {
|
||||
nir_instr *instr = nir_src_parent_instr(use);
|
||||
is_f32_to_f16 &= (allow_standard && is_f32_to_f16_conversion(instr)) ||
|
||||
(allow_rtz && is_n_to_m_conversion(instr, 32, nir_op_f2f16_rtz)) ||
|
||||
(allow_rtne && is_n_to_m_conversion(instr, 32, nir_op_f2f16_rtne));
|
||||
is_i32_to_i16 &= is_i32_to_i16_conversion(instr);
|
||||
}
|
||||
if (instr->type != nir_instr_type_alu)
|
||||
return false;
|
||||
|
||||
if (!is_f32_to_f16 && !is_i32_to_i16)
|
||||
return false;
|
||||
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
||||
|
||||
switch (alu->op) {
|
||||
case nir_op_pack_half_2x16_split:
|
||||
if (alu->src[0].src.ssa != alu->src[1].src.ssa)
|
||||
return false;
|
||||
FALLTHROUGH;
|
||||
case nir_op_pack_half_2x16:
|
||||
/* pack_half rounding is undefined */
|
||||
if (!opt_f2f16)
|
||||
return false;
|
||||
break;
|
||||
case nir_op_pack_half_2x16_rtz_split:
|
||||
if (alu->src[0].src.ssa != alu->src[1].src.ssa)
|
||||
return false;
|
||||
FALLTHROUGH;
|
||||
case nir_op_f2f16_rtz:
|
||||
if (rdm != nir_rounding_mode_rtz || !opt_f2f16)
|
||||
return false;
|
||||
break;
|
||||
case nir_op_f2f16_rtne:
|
||||
if (rdm != nir_rounding_mode_rtne || !opt_f2f16)
|
||||
return false;
|
||||
break;
|
||||
case nir_op_f2f16:
|
||||
case nir_op_f2fmp:
|
||||
if (src_rdm != rdm && src_rdm != nir_rounding_mode_undef)
|
||||
return false;
|
||||
if (!opt_f2f16)
|
||||
return false;
|
||||
break;
|
||||
case nir_op_i2i16:
|
||||
case nir_op_i2imp:
|
||||
case nir_op_u2u16:
|
||||
if (!opt_i2i16)
|
||||
return false;
|
||||
break;
|
||||
case nir_op_pack_sint_2x16:
|
||||
if (!opt_i2i16_sat)
|
||||
return false;
|
||||
break;
|
||||
case nir_op_pack_uint_2x16:
|
||||
if (!opt_u2u16_sat)
|
||||
return false;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* All uses are the same conversions. Replace them with mov. */
|
||||
nir_foreach_use(use, ssa) {
|
||||
nir_alu_instr *conv = nir_instr_as_alu(nir_src_parent_instr(use));
|
||||
conv->op = nir_op_mov;
|
||||
nir_alu_instr *alu = nir_instr_as_alu(nir_src_parent_instr(use));
|
||||
switch (alu->op) {
|
||||
case nir_op_f2f16_rtne:
|
||||
case nir_op_f2f16_rtz:
|
||||
case nir_op_f2f16:
|
||||
case nir_op_f2fmp:
|
||||
case nir_op_i2i16:
|
||||
case nir_op_i2imp:
|
||||
case nir_op_u2u16:
|
||||
alu->op = nir_op_mov;
|
||||
break;
|
||||
case nir_op_pack_half_2x16_rtz_split:
|
||||
case nir_op_pack_half_2x16_split:
|
||||
alu->op = nir_op_pack_32_2x16_split;
|
||||
break;
|
||||
case nir_op_pack_32_2x16_split:
|
||||
/* Split opcodes have two operands, so the iteration
|
||||
* for the second use will already observe the
|
||||
* updated opcode.
|
||||
*/
|
||||
break;
|
||||
case nir_op_pack_half_2x16:
|
||||
case nir_op_pack_sint_2x16:
|
||||
case nir_op_pack_uint_2x16:
|
||||
alu->op = nir_op_pack_32_2x16;
|
||||
break;
|
||||
default:
|
||||
unreachable("unsupported conversion op");
|
||||
};
|
||||
}
|
||||
|
||||
ssa->bit_size = 16;
|
||||
|
|
|
|||
|
|
@ -209,8 +209,9 @@ static void si_late_optimize_16bit_samplers(struct si_screen *sscreen, nir_shade
|
|||
};
|
||||
struct nir_opt_16bit_tex_image_options opt_16bit_options = {
|
||||
.rounding_mode = nir_rounding_mode_undef,
|
||||
.opt_tex_dest_types = nir_type_float,
|
||||
.opt_image_dest_types = nir_type_float,
|
||||
.opt_tex_dest_types = nir_type_float | nir_type_int | nir_type_uint,
|
||||
.opt_image_dest_types = nir_type_float | nir_type_int | nir_type_uint,
|
||||
.integer_dest_saturates = true,
|
||||
.opt_image_store_data = true,
|
||||
.opt_image_srcs = true,
|
||||
.opt_srcs_options_count = has_g16 ? 2 : 1,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue