From 1636037b664322b5e7553a91983ccb664a140752 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sat, 20 May 2023 22:15:06 -0400 Subject: [PATCH] agx: Implement bitfieldExtract natively We have a bfeil instruction which mostly maps to the GLSL thing, so use it with the appropriate lowering. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/compiler/agx_compile.c | 11 ++++++++++ src/asahi/compiler/agx_compile.h | 1 - src/asahi/compiler/agx_nir_algebraic.py | 27 +++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 5ccb9c79771..6915e149967 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -1137,6 +1137,17 @@ agx_emit_alu(agx_builder *b, nir_alu_instr *instr) return agx_extr_to(b, dst, s0, s1, s2, nir_alu_src_as_uint(instr->src[3])); + case nir_op_ubitfield_extract: { + unsigned m = nir_alu_src_as_uint(instr->src[2]); + assert(m != 0 && "should've been optimized"); + + /* Disable masking if the whole thing is used */ + if (m >= 32) + m = 0; + + return agx_bfeil_to(b, dst, i0, s0, s1, m); + } + case nir_op_bcsel: return agx_icmpsel_to(b, dst, s0, i0, s2, s1, AGX_ICOND_UEQ); diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h index 2f82ceeff67..c959ad55edf 100644 --- a/src/asahi/compiler/agx_compile.h +++ b/src/asahi/compiler/agx_compile.h @@ -188,7 +188,6 @@ static const nir_shader_compiler_options agx_nir_options = { .lower_flrp32 = true, .lower_fpow = true, .lower_fmod = true, - .lower_bitfield_extract_to_shifts = true, .lower_bitfield_insert_to_shifts = true, .lower_ifind_msb = true, .lower_find_lsb = true, diff --git a/src/asahi/compiler/agx_nir_algebraic.py b/src/asahi/compiler/agx_nir_algebraic.py index 868e2866df3..9017589ad6d 100644 --- a/src/asahi/compiler/agx_nir_algebraic.py +++ b/src/asahi/compiler/agx_nir_algebraic.py @@ -38,6 +38,33 @@ lower_pack = [ # For optimizing extract->convert sequences for unpack/pack norm (('u2f32', ('u2u32', a)), ('u2f32', a)), (('i2f32', ('i2i32', a)), ('i2f32', a)), + + # These are based on the lowerings from nir_opt_algebraic, but conditioned + # on the number of bits not being constant. If the bit count is constant + # (the happy path) we can use our native instruction instead. + (('ibitfield_extract', 'value', 'offset', 'bits(is_not_const)'), + ('bcsel', ('ieq', 0, 'bits'), + 0, + ('ishr', + ('ishl', 'value', ('isub', ('isub', 32, 'bits'), 'offset')), + ('isub', 32, 'bits')))), + + (('ubitfield_extract', 'value', 'offset', 'bits(is_not_const)'), + ('iand', + ('ushr', 'value', 'offset'), + ('bcsel', ('ieq', 'bits', 32), + 0xffffffff, + ('isub', ('ishl', 1, 'bits'), 1)))), + + # Codegen depends on this trivial case being optimized out. + (('ubitfield_extract', 'value', 'offset', 0), 0), + (('ibitfield_extract', 'value', 'offset', 0), 0), + + # At this point, bitfield extracts are constant. We can only do constant + # unsigned bitfield extract, so lower signed to unsigned + sign extend. + (('ibitfield_extract', a, b, '#bits'), + ('ishr', ('ishl', ('ubitfield_extract', a, b, 'bits'), ('isub', 32, 'bits')), + ('isub', 32, 'bits'))), ] # (x * y) + s = (x * y) + (s << 0)