nir/opt_16bit_tex_image: optimize extract half sources

I also tried extract_i16/u16, but that causes a lot of regressions.

Foz-DB Navi21:
Totals from 3 (0.00% of 79395) affected shaders:
Instrs: 367 -> 355 (-3.27%)
CodeSize: 2156 -> 2136 (-0.93%)
VGPRs: 80 -> 72 (-10.00%)
Latency: 3163 -> 3153 (-0.32%); split: -0.51%, +0.19%
InvThroughput: 424 -> 404 (-4.72%)
Copies: 31 -> 42 (+35.48%); split: -3.23%, +38.71%
PreVGPRs: 27 -> 25 (-7.41%)
VALU: 208 -> 196 (-5.77%)

Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32058>
This commit is contained in:
Georg Lehmann 2024-11-08 15:07:54 +01:00 committed by Marge Bot
parent bad38c1e76
commit ee74b090db

View file

@ -742,14 +742,15 @@ can_opt_16bit_src(nir_def *ssa, nir_alu_type src_type, bool sext_matters)
can_opt &= (const_is_u16(comp) || const_is_i16(comp));
} else if (nir_scalar_is_alu(comp)) {
nir_alu_instr *alu = nir_instr_as_alu(comp.def->parent_instr);
if (alu->src[0].src.ssa->bit_size != 16)
return false;
bool is_16bit = alu->src[0].src.ssa->bit_size == 16;
if (alu->op == nir_op_f2f32)
if ((alu->op == nir_op_f2f32 && is_16bit) ||
alu->op == nir_op_unpack_half_2x16_split_x ||
alu->op == nir_op_unpack_half_2x16_split_y)
can_opt &= opt_f16;
else if (alu->op == nir_op_i2i32)
else if (alu->op == nir_op_i2i32 && is_16bit)
can_opt &= opt_i16 || opt_i16_u16;
else if (alu->op == nir_op_u2u32)
else if (alu->op == nir_op_u2u32 && is_16bit)
can_opt &= opt_u16 || opt_i16_u16;
else
return false;
@ -782,6 +783,23 @@ opt_16bit_src(nir_builder *b, nir_instr *instr, nir_src *src, nir_alu_type src_t
} else {
/* conversion instruction */
new_comps[i] = nir_scalar_chase_alu_src(comp, 0);
if (new_comps[i].def->bit_size != 16) {
assert(new_comps[i].def->bit_size == 32);
nir_def *extract = nir_channel(b, new_comps[i].def, new_comps[i].comp);
switch (nir_scalar_alu_op(comp)) {
case nir_op_unpack_half_2x16_split_x:
extract = nir_unpack_32_2x16_split_x(b, extract);
break;
case nir_op_unpack_half_2x16_split_y:
extract = nir_unpack_32_2x16_split_y(b, extract);
break;
default:
unreachable("unsupported alu op");
}
new_comps[i] = nir_get_scalar(extract, 0);
}
}
}