From ee74b090dba18d771ae1b1d16d2c4a5cf36ae6c8 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Fri, 8 Nov 2024 15:07:54 +0100 Subject: [PATCH] nir/opt_16bit_tex_image: optimize extract half sources I also tried extract_i16/u16, but that causes a lot of regressions. Foz-DB Navi21: Totals from 3 (0.00% of 79395) affected shaders: Instrs: 367 -> 355 (-3.27%) CodeSize: 2156 -> 2136 (-0.93%) VGPRs: 80 -> 72 (-10.00%) Latency: 3163 -> 3153 (-0.32%); split: -0.51%, +0.19% InvThroughput: 424 -> 404 (-4.72%) Copies: 31 -> 42 (+35.48%); split: -3.23%, +38.71% PreVGPRs: 27 -> 25 (-7.41%) VALU: 208 -> 196 (-5.77%) Reviewed-by: Alyssa Rosenzweig Part-of: --- src/compiler/nir/nir_lower_mediump.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/src/compiler/nir/nir_lower_mediump.c b/src/compiler/nir/nir_lower_mediump.c index 1c8df13dd0d..c0289dd0f91 100644 --- a/src/compiler/nir/nir_lower_mediump.c +++ b/src/compiler/nir/nir_lower_mediump.c @@ -742,14 +742,15 @@ can_opt_16bit_src(nir_def *ssa, nir_alu_type src_type, bool sext_matters) can_opt &= (const_is_u16(comp) || const_is_i16(comp)); } else if (nir_scalar_is_alu(comp)) { nir_alu_instr *alu = nir_instr_as_alu(comp.def->parent_instr); - if (alu->src[0].src.ssa->bit_size != 16) - return false; + bool is_16bit = alu->src[0].src.ssa->bit_size == 16; - if (alu->op == nir_op_f2f32) + if ((alu->op == nir_op_f2f32 && is_16bit) || + alu->op == nir_op_unpack_half_2x16_split_x || + alu->op == nir_op_unpack_half_2x16_split_y) can_opt &= opt_f16; - else if (alu->op == nir_op_i2i32) + else if (alu->op == nir_op_i2i32 && is_16bit) can_opt &= opt_i16 || opt_i16_u16; - else if (alu->op == nir_op_u2u32) + else if (alu->op == nir_op_u2u32 && is_16bit) can_opt &= opt_u16 || opt_i16_u16; else return false; @@ -782,6 +783,23 @@ opt_16bit_src(nir_builder *b, nir_instr *instr, nir_src *src, nir_alu_type src_t } else { /* conversion instruction */ new_comps[i] = nir_scalar_chase_alu_src(comp, 0); + if (new_comps[i].def->bit_size != 16) { + assert(new_comps[i].def->bit_size == 32); + + nir_def *extract = nir_channel(b, new_comps[i].def, new_comps[i].comp); + switch (nir_scalar_alu_op(comp)) { + case nir_op_unpack_half_2x16_split_x: + extract = nir_unpack_32_2x16_split_x(b, extract); + break; + case nir_op_unpack_half_2x16_split_y: + extract = nir_unpack_32_2x16_split_y(b, extract); + break; + default: + unreachable("unsupported alu op"); + } + + new_comps[i] = nir_get_scalar(extract, 0); + } } }