From 1636037b664322b5e7553a91983ccb664a140752 Mon Sep 17 00:00:00 2001
From: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Date: Sat, 20 May 2023 22:15:06 -0400
Subject: [PATCH] agx: Implement bitfieldExtract natively

We have a bfeil instruction which mostly maps to the GLSL thing, so use it with
the appropriate lowering.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23351>
---
 src/asahi/compiler/agx_compile.c        | 11 ++++++++++
 src/asahi/compiler/agx_compile.h        |  1 -
 src/asahi/compiler/agx_nir_algebraic.py | 27 +++++++++++++++++++++++++
 3 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c
index 5ccb9c79771..6915e149967 100644
--- a/src/asahi/compiler/agx_compile.c
+++ b/src/asahi/compiler/agx_compile.c
@@ -1137,6 +1137,17 @@ agx_emit_alu(agx_builder *b, nir_alu_instr *instr)
       return agx_extr_to(b, dst, s0, s1, s2,
                          nir_alu_src_as_uint(instr->src[3]));
 
+   case nir_op_ubitfield_extract: {
+      unsigned m = nir_alu_src_as_uint(instr->src[2]);
+      assert(m != 0 && "should've been optimized");
+
+      /* Disable masking if the whole thing is used */
+      if (m >= 32)
+         m = 0;
+
+      return agx_bfeil_to(b, dst, i0, s0, s1, m);
+   }
+
    case nir_op_bcsel:
       return agx_icmpsel_to(b, dst, s0, i0, s2, s1, AGX_ICOND_UEQ);
 
diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h
index 2f82ceeff67..c959ad55edf 100644
--- a/src/asahi/compiler/agx_compile.h
+++ b/src/asahi/compiler/agx_compile.h
@@ -188,7 +188,6 @@ static const nir_shader_compiler_options agx_nir_options = {
    .lower_flrp32 = true,
    .lower_fpow = true,
    .lower_fmod = true,
-   .lower_bitfield_extract_to_shifts = true,
    .lower_bitfield_insert_to_shifts = true,
    .lower_ifind_msb = true,
    .lower_find_lsb = true,
diff --git a/src/asahi/compiler/agx_nir_algebraic.py b/src/asahi/compiler/agx_nir_algebraic.py
index 868e2866df3..9017589ad6d 100644
--- a/src/asahi/compiler/agx_nir_algebraic.py
+++ b/src/asahi/compiler/agx_nir_algebraic.py
@@ -38,6 +38,33 @@ lower_pack = [
     # For optimizing extract->convert sequences for unpack/pack norm
     (('u2f32', ('u2u32', a)), ('u2f32', a)),
     (('i2f32', ('i2i32', a)), ('i2f32', a)),
+
+    # These are based on the lowerings from nir_opt_algebraic, but conditioned
+    # on the number of bits not being constant. If the bit count is constant
+    # (the happy path) we can use our native instruction instead.
+    (('ibitfield_extract', 'value', 'offset', 'bits(is_not_const)'),
+     ('bcsel', ('ieq', 0, 'bits'),
+      0,
+      ('ishr',
+       ('ishl', 'value', ('isub', ('isub', 32, 'bits'), 'offset')),
+       ('isub', 32, 'bits')))),
+
+    (('ubitfield_extract', 'value', 'offset', 'bits(is_not_const)'),
+     ('iand',
+      ('ushr', 'value', 'offset'),
+      ('bcsel', ('ieq', 'bits', 32),
+       0xffffffff,
+       ('isub', ('ishl', 1, 'bits'), 1)))),
+
+    # Codegen depends on this trivial case being optimized out.
+    (('ubitfield_extract', 'value', 'offset', 0), 0),
+    (('ibitfield_extract', 'value', 'offset', 0), 0),
+
+    # At this point, bitfield extracts are constant. We can only do constant
+    # unsigned bitfield extract, so lower signed to unsigned + sign extend.
+    (('ibitfield_extract', a, b, '#bits'),
+     ('ishr', ('ishl', ('ubitfield_extract', a, b, 'bits'), ('isub', 32, 'bits')),
+      ('isub', 32, 'bits'))),
 ]
 
 # (x * y) + s = (x * y) + (s << 0)