pan/nir: Lower texturing ops in NIR on Bifrost

Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com> Reviewed-by: Lorenzo Rossi <lorenzo.rossi@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41036>
2026-05-07 11:28:05 +02:00 · 2026-04-16 17:16:33 -04:00 · 2026-04-16 17:16:33 -04:00 · 6c9ffd782b
commit 6c9ffd782b
parent 05a066c921
2 changed files with 3 additions and 678 deletions
--- a/src/panfrost/compiler/bifrost/bifrost_compile.c
+++ b/src/panfrost/compiler/bifrost/bifrost_compile.c
@ -3658,35 +3658,6 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
   }
 }

-/* Returns dimension with 0 special casing cubemaps. Shamelessly copied from
- * Midgard */
-static unsigned
-bifrost_tex_format(enum glsl_sampler_dim dim)
-{
-   switch (dim) {
-   case GLSL_SAMPLER_DIM_1D:
-   case GLSL_SAMPLER_DIM_BUF:
-      return 1;
-
-   case GLSL_SAMPLER_DIM_2D:
-   case GLSL_SAMPLER_DIM_MS:
-   case GLSL_SAMPLER_DIM_EXTERNAL:
-   case GLSL_SAMPLER_DIM_RECT:
-   case GLSL_SAMPLER_DIM_SUBPASS:
-   case GLSL_SAMPLER_DIM_SUBPASS_MS:
-      return 2;
-
-   case GLSL_SAMPLER_DIM_3D:
-      return 3;
-
-   case GLSL_SAMPLER_DIM_CUBE:
-      return 0;
-
-   default:
-      UNREACHABLE("Unknown sampler dim type\n");
-   }
-}
-
 static enum bi_dimension
 valhall_tex_dimension(enum glsl_sampler_dim dim)
 {
@ -3714,555 +3685,6 @@ valhall_tex_dimension(enum glsl_sampler_dim dim)
   }
 }

-static enum bifrost_texture_format_full
-bi_texture_format(nir_alu_type T, enum bi_clamp clamp)
-{
-   switch (T) {
-   case nir_type_float16:
-      return BIFROST_TEXTURE_FORMAT_F16 + clamp;
-   case nir_type_float32:
-      return BIFROST_TEXTURE_FORMAT_F32 + clamp;
-   case nir_type_uint16:
-      return BIFROST_TEXTURE_FORMAT_U16;
-   case nir_type_int16:
-      return BIFROST_TEXTURE_FORMAT_S16;
-   case nir_type_uint32:
-      return BIFROST_TEXTURE_FORMAT_U32;
-   case nir_type_int32:
-      return BIFROST_TEXTURE_FORMAT_S32;
-   default:
-      UNREACHABLE("Invalid type for texturing");
-   }
-}
-
-/* Array indices are specified as 32-bit uints, need to convert. In .z component
- * from NIR */
-static bi_index
-bi_emit_texc_array_index(bi_builder *b, bi_index idx, nir_alu_type T)
-{
-   /* For (u)int we can just passthrough */
-   nir_alu_type base = nir_alu_type_get_base_type(T);
-   if (base == nir_type_int || base == nir_type_uint)
-      return idx;
-
-   /* Otherwise we convert */
-   assert(T == nir_type_float32);
-
-   /* OpenGL ES 3.2 specification section 8.14.2 ("Coordinate Wrapping and
-    * Texel Selection") defines the layer to be taken from clamp(RNE(r),
-    * 0, dt - 1). So we use round RTE, clamping is handled at the data
-    * structure level */
-
-   bi_instr *I = bi_f32_to_u32_to(b, bi_temp(b->shader), idx);
-   I->round = BI_ROUND_NONE;
-   return I->dest[0];
-}
-
-/* TEXC's explicit and bias LOD modes requires the LOD to be transformed to a
- * 16-bit 8:8 fixed-point format. We lower as:
- *
- * F32_TO_S32(clamp(x, -16.0, +16.0) * 256.0) & 0xFFFF =
- * MKVEC(F32_TO_S32(clamp(x * 1.0/16.0, -1.0, 1.0) * (16.0 * 256.0)), #0)
- */
-
-static bi_index
-bi_emit_texc_lod_88(bi_builder *b, bi_index lod, bool fp16)
-{
-   /* Precompute for constant LODs to avoid general constant folding */
-   if (lod.type == BI_INDEX_CONSTANT) {
-      uint32_t raw = lod.value;
-      float x = fp16 ? _mesa_half_to_float(raw) : uif(raw);
-      int32_t s32 = CLAMP(x, -16.0f, 16.0f) * 256.0f;
-      return bi_imm_u32(s32 & 0xFFFF);
-   }
-
-   /* Sort of arbitrary. Must be less than 128.0, greater than or equal to
-    * the max LOD (16 since we cap at 2^16 texture dimensions), and
-    * preferably small to minimize precision loss */
-   const float max_lod = 16.0;
-
-   bi_instr *fsat =
-      bi_fma_f32_to(b, bi_temp(b->shader), fp16 ? bi_half(lod, false) : lod,
-                    bi_imm_f32(1.0f / max_lod), bi_negzero());
-
-   fsat->clamp = BI_CLAMP_CLAMP_M1_1;
-
-   bi_index fmul =
-      bi_fma_f32(b, fsat->dest[0], bi_imm_f32(max_lod * 256.0f), bi_negzero());
-
-   return bi_mkvec_v2i16(b, bi_half(bi_f32_to_s32(b, fmul), false),
-                         bi_imm_u16(0));
-}
-
-/* FETCH takes a 32-bit staging register containing the LOD as an integer in
- * the bottom 16-bits and (if present) the cube face index in the top 16-bits.
- * TODO: Cube face.
- */
-
-static bi_index
-bi_emit_texc_lod_cube(bi_builder *b, bi_index lod)
-{
-   return bi_lshift_or_i32(b, lod, bi_zero(), bi_imm_u8(8));
-}
-
-/* The hardware specifies texel offsets and multisample indices together as a
- * u8vec4 <offset, ms index>. By default all are zero, so if have either a
- * nonzero texel offset or a nonzero multisample index, we build a u8vec4 with
- * the bits we need and return that to be passed as a staging register. Else we
- * return 0 to avoid allocating a data register when everything is zero. */
-
-static bi_index
-bi_emit_texc_offset_ms_index(bi_builder *b, nir_tex_instr *instr)
-{
-   bi_index dest = bi_zero();
-
-   int offs_idx = nir_tex_instr_src_index(instr, nir_tex_src_offset);
-   if (offs_idx >= 0 && !nir_src_is_zero(instr->src[offs_idx].src)) {
-      unsigned nr = nir_src_num_components(instr->src[offs_idx].src);
-      bi_index idx = bi_src_index(&instr->src[offs_idx].src);
-      dest = bi_mkvec_v4i8(
-         b, (nr > 0) ? bi_byte(bi_extract(b, idx, 0), 0) : bi_imm_u8(0),
-         (nr > 1) ? bi_byte(bi_extract(b, idx, 1), 0) : bi_imm_u8(0),
-         (nr > 2) ? bi_byte(bi_extract(b, idx, 2), 0) : bi_imm_u8(0),
-         bi_imm_u8(0));
-   }
-
-   int ms_idx = nir_tex_instr_src_index(instr, nir_tex_src_ms_index);
-   if (ms_idx >= 0 && !nir_src_is_zero(instr->src[ms_idx].src)) {
-      dest = bi_lshift_or_i32(b, bi_src_index(&instr->src[ms_idx].src), dest,
-                              bi_imm_u8(24));
-   }
-
-   return dest;
-}
-
-static void
-bi_emit_cube_coord(bi_builder *b, bi_index coord, bi_index *face, bi_index *s,
-                   bi_index *t)
-{
-   /* Compute max { |x|, |y|, |z| } */
-   bi_index maxxyz = bi_temp(b->shader);
-   *face = bi_temp(b->shader);
-
-   bi_index cx = bi_extract(b, coord, 0), cy = bi_extract(b, coord, 1),
-            cz = bi_extract(b, coord, 2);
-
-   /* Use a pseudo op on Bifrost due to tuple restrictions */
-   if (b->shader->arch <= 8) {
-      bi_cubeface_to(b, maxxyz, *face, cx, cy, cz);
-   } else {
-      bi_cubeface1_to(b, maxxyz, cx, cy, cz);
-      bi_cubeface2_v9_to(b, *face, cx, cy, cz);
-   }
-
-   /* Select coordinates */
-   bi_index ssel =
-      bi_cube_ssel(b, bi_extract(b, coord, 2), bi_extract(b, coord, 0), *face);
-   bi_index tsel =
-      bi_cube_tsel(b, bi_extract(b, coord, 1), bi_extract(b, coord, 2), *face);
-
-   /* The OpenGL ES specification requires us to transform an input vector
-    * (x, y, z) to the coordinate, given the selected S/T:
-    *
-    * (1/2 ((s / max{x,y,z}) + 1), 1/2 ((t / max{x, y, z}) + 1))
-    *
-    * We implement (s shown, t similar) in a form friendlier to FMA
-    * instructions, and clamp coordinates at the end for correct
-    * NaN/infinity handling:
-    *
-    * fsat(s * (0.5 * (1 / max{x, y, z})) + 0.5)
-    *
-    * Take the reciprocal of max{x, y, z}
-    */
-   bi_index rcp = bi_frcp_f32(b, maxxyz);
-
-   /* Calculate 0.5 * (1.0 / max{x, y, z}) */
-   bi_index fma1 = bi_fma_f32(b, rcp, bi_imm_f32(0.5f), bi_negzero());
-
-   /* Transform the coordinates */
-   *s = bi_temp(b->shader);
-   *t = bi_temp(b->shader);
-
-   bi_instr *S = bi_fma_f32_to(b, *s, fma1, ssel, bi_imm_f32(0.5f));
-   bi_instr *T = bi_fma_f32_to(b, *t, fma1, tsel, bi_imm_f32(0.5f));
-
-   S->clamp = BI_CLAMP_CLAMP_0_1;
-   T->clamp = BI_CLAMP_CLAMP_0_1;
-}
-
-/* Emits a cube map descriptor, returning lower 32-bits and putting upper
- * 32-bits in passed pointer t. The packing of the face with the S coordinate
- * exploits the redundancy of floating points with the range restriction of
- * CUBEFACE output.
- *
- *     struct cube_map_descriptor {
- *         float s : 29;
- *         unsigned face : 3;
- *         float t : 32;
- *     }
- *
- * Since the cube face index is preshifted, this is easy to pack with a bitwise
- * MUX.i32 and a fixed mask, selecting the lower bits 29 from s and the upper 3
- * bits from face.
- */
-
-static bi_index
-bi_emit_texc_cube_coord(bi_builder *b, bi_index coord, bi_index *t)
-{
-   bi_index face, s;
-   bi_emit_cube_coord(b, coord, &face, &s, t);
-   bi_index mask = bi_imm_u32(BITFIELD_MASK(29));
-   return bi_mux_i32(b, s, face, mask, BI_MUX_BIT);
-}
-
-/* Map to the main texture op used. Some of these (txd in particular) will
- * lower to multiple texture ops with different opcodes (GRDESC_DER + TEX in
- * sequence). We assume that lowering is handled elsewhere.
- */
-
-static enum bifrost_tex_op
-bi_tex_op(nir_texop op)
-{
-   switch (op) {
-   case nir_texop_tex:
-   case nir_texop_txb:
-   case nir_texop_txl:
-   case nir_texop_txd:
-      return BIFROST_TEX_OP_TEX;
-   case nir_texop_txf:
-   case nir_texop_txf_ms:
-   case nir_texop_tg4:
-      return BIFROST_TEX_OP_FETCH;
-   case nir_texop_lod:
-      return BIFROST_TEX_OP_GRDESC;
-   case nir_texop_txs:
-   case nir_texop_query_levels:
-   case nir_texop_texture_samples:
-   case nir_texop_samples_identical:
-      UNREACHABLE("should've been lowered");
-   default:
-      UNREACHABLE("unsupported tex op");
-   }
-}
-
-/* Data registers required by texturing in the order they appear. All are
- * optional, the texture operation descriptor determines which are present.
- * Note since 3D arrays are not permitted at an API level, Z_COORD and
- * ARRAY/SHADOW are exlusive, so TEXC in practice reads at most 8 registers */
-
-enum bifrost_tex_dreg {
-   BIFROST_TEX_DREG_Z_COORD = 0,
-   BIFROST_TEX_DREG_Y_DELTAS = 1,
-   BIFROST_TEX_DREG_LOD = 2,
-   BIFROST_TEX_DREG_GRDESC_HI = 3,
-   BIFROST_TEX_DREG_SHADOW = 4,
-   BIFROST_TEX_DREG_ARRAY = 5,
-   BIFROST_TEX_DREG_OFFSETMS = 6,
-   BIFROST_TEX_DREG_SAMPLER = 7,
-   BIFROST_TEX_DREG_TEXTURE = 8,
-   BIFROST_TEX_DREG_COUNT,
-};
-
-static void
-bi_emit_texc(bi_builder *b, nir_tex_instr *instr)
-{
-   assert((instr->op != nir_texop_txf ||
-           instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) &&
-          "Texel buffers should already have been lowered");
-
-   struct bifrost_texture_operation desc = {
-      .op = bi_tex_op(instr->op),
-      .offset_or_bias_disable = false, /* TODO */
-      .shadow_or_clamp_disable = instr->is_shadow,
-      .array = instr->is_array && instr->op != nir_texop_lod,
-      .dimension = bifrost_tex_format(instr->sampler_dim),
-      .format = bi_texture_format(instr->dest_type | instr->def.bit_size,
-                                  BI_CLAMP_NONE), /* TODO */
-      .mask = 0xF,
-   };
-
-   switch (desc.op) {
-   case BIFROST_TEX_OP_TEX:
-      desc.lod_or_fetch = BIFROST_LOD_MODE_COMPUTE;
-      break;
-   case BIFROST_TEX_OP_FETCH:
-      desc.lod_or_fetch = (enum bifrost_lod_mode)(
-         instr->op == nir_texop_tg4
-            ? BIFROST_TEXTURE_FETCH_GATHER4_R + instr->component
-            : BIFROST_TEXTURE_FETCH_TEXEL);
-      break;
-   case BIFROST_TEX_OP_GRDESC:
-      break;
-   default:
-      UNREACHABLE("texture op unsupported");
-   }
-
-   /* 32-bit indices to be allocated as consecutive staging registers */
-   bi_index dregs[BIFROST_TEX_DREG_COUNT] = {};
-   bi_index cx = bi_null(), cy = bi_null();
-   bi_index ddx = bi_null();
-   bi_index ddy = bi_null();
-
-   for (unsigned i = 0; i < instr->num_srcs; ++i) {
-      bi_index index = bi_src_index(&instr->src[i].src);
-      unsigned sz = nir_src_bit_size(instr->src[i].src);
-      unsigned components = nir_src_num_components(instr->src[i].src);
-      ASSERTED nir_alu_type base = nir_tex_instr_src_type(instr, i);
-      nir_alu_type T = base | sz;
-
-      switch (instr->src[i].src_type) {
-      case nir_tex_src_coord:
-         if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
-            cx = bi_emit_texc_cube_coord(b, index, &cy);
-         } else {
-            /* Copy XY (for 2D+) or XX (for 1D) */
-            cx = bi_extract(b, index, 0);
-            cy = bi_extract(b, index, MIN2(1, components - 1));
-
-            assert(components >= 1 && components <= 3);
-
-            if (components == 3 && !desc.array) {
-               /* 3D */
-               dregs[BIFROST_TEX_DREG_Z_COORD] = bi_extract(b, index, 2);
-            }
-         }
-
-         if (desc.array) {
-            dregs[BIFROST_TEX_DREG_ARRAY] = bi_emit_texc_array_index(
-               b, bi_extract(b, index, components - 1), T);
-         }
-
-         break;
-
-      case nir_tex_src_lod:
-         if (desc.op == BIFROST_TEX_OP_TEX &&
-             nir_src_is_zero(instr->src[i].src)) {
-            desc.lod_or_fetch = BIFROST_LOD_MODE_ZERO;
-         } else if (desc.op == BIFROST_TEX_OP_TEX) {
-            assert(base == nir_type_float);
-
-            assert(sz == 16 || sz == 32);
-            dregs[BIFROST_TEX_DREG_LOD] =
-               bi_emit_texc_lod_88(b, index, sz == 16);
-            desc.lod_or_fetch = BIFROST_LOD_MODE_EXPLICIT;
-         } else {
-            assert(desc.op == BIFROST_TEX_OP_FETCH);
-            assert(base == nir_type_uint || base == nir_type_int);
-            assert(sz == 16 || sz == 32);
-
-            dregs[BIFROST_TEX_DREG_LOD] = bi_emit_texc_lod_cube(b, index);
-         }
-
-         break;
-
-      case nir_tex_src_ddx:
-         ddx = index;
-         break;
-
-      case nir_tex_src_ddy:
-         ddy = index;
-         break;
-
-      case nir_tex_src_bias:
-         /* Upper 16-bits interpreted as a clamp, leave zero */
-         assert(desc.op == BIFROST_TEX_OP_TEX);
-         assert(base == nir_type_float);
-         assert(sz == 16 || sz == 32);
-         dregs[BIFROST_TEX_DREG_LOD] = bi_emit_texc_lod_88(b, index, sz == 16);
-         desc.lod_or_fetch = BIFROST_LOD_MODE_BIAS;
-         break;
-
-      case nir_tex_src_ms_index:
-      case nir_tex_src_offset:
-         if (desc.offset_or_bias_disable)
-            break;
-
-         dregs[BIFROST_TEX_DREG_OFFSETMS] =
-            bi_emit_texc_offset_ms_index(b, instr);
-         if (!bi_is_equiv(dregs[BIFROST_TEX_DREG_OFFSETMS], bi_zero()))
-            desc.offset_or_bias_disable = true;
-         break;
-
-      case nir_tex_src_comparator:
-         dregs[BIFROST_TEX_DREG_SHADOW] = index;
-         break;
-
-      case nir_tex_src_texture_offset:
-         dregs[BIFROST_TEX_DREG_TEXTURE] = index;
-         break;
-
-      case nir_tex_src_sampler_offset:
-         dregs[BIFROST_TEX_DREG_SAMPLER] = index;
-         break;
-
-      default:
-         UNREACHABLE("Unhandled src type in texc emit");
-      }
-   }
-
-   if (desc.op == BIFROST_TEX_OP_FETCH &&
-       bi_is_null(dregs[BIFROST_TEX_DREG_LOD])) {
-      dregs[BIFROST_TEX_DREG_LOD] = bi_emit_texc_lod_cube(b, bi_zero());
-   }
-
-   /* Choose an index mode */
-
-   bool direct_tex = bi_is_null(dregs[BIFROST_TEX_DREG_TEXTURE]);
-   bool direct_samp = bi_is_null(dregs[BIFROST_TEX_DREG_SAMPLER]);
-   bool direct = direct_tex && direct_samp;
-
-   desc.immediate_indices =
-      direct && (instr->sampler_index < 16 && instr->texture_index < 128);
-
-   if (desc.immediate_indices) {
-      desc.sampler_index_or_mode = instr->sampler_index;
-      desc.index = instr->texture_index;
-   } else {
-      unsigned mode = 0;
-
-      if (direct && instr->sampler_index == instr->texture_index &&
-          instr->sampler_index < 128) {
-         mode = BIFROST_INDEX_IMMEDIATE_SHARED;
-         desc.index = instr->texture_index;
-      } else if (direct && instr->sampler_index < 128) {
-         mode = BIFROST_INDEX_IMMEDIATE_SAMPLER;
-         desc.index = instr->sampler_index;
-         dregs[BIFROST_TEX_DREG_TEXTURE] =
-            bi_mov_i32(b, bi_imm_u32(instr->texture_index));
-      } else if (direct_tex && instr->texture_index < 128) {
-         mode = BIFROST_INDEX_IMMEDIATE_TEXTURE;
-         desc.index = instr->texture_index;
-
-         if (direct_samp) {
-            dregs[BIFROST_TEX_DREG_SAMPLER] =
-               bi_mov_i32(b, bi_imm_u32(instr->sampler_index));
-         }
-      } else if (direct_samp && instr->sampler_index < 128) {
-         mode = BIFROST_INDEX_IMMEDIATE_SAMPLER;
-         desc.index = instr->sampler_index;
-
-         if (direct_tex) {
-            dregs[BIFROST_TEX_DREG_TEXTURE] =
-               bi_mov_i32(b, bi_imm_u32(instr->texture_index));
-         }
-      } else {
-         mode = BIFROST_INDEX_REGISTER;
-
-         if (direct_tex) {
-            dregs[BIFROST_TEX_DREG_TEXTURE] =
-               bi_mov_i32(b, bi_imm_u32(instr->texture_index));
-         }
-
-         if (direct_samp) {
-            dregs[BIFROST_TEX_DREG_SAMPLER] =
-               bi_mov_i32(b, bi_imm_u32(instr->sampler_index));
-         }
-      }
-
-      mode |= (BIFROST_TEXTURE_OPERATION_SINGLE << 2);
-      desc.sampler_index_or_mode = mode;
-   }
-
-   if (!bi_is_null(ddx) || !bi_is_null(ddy)) {
-      assert(!bi_is_null(ddx) && !bi_is_null(ddy));
-      struct bifrost_texture_operation gropdesc = {
-         .sampler_index_or_mode = desc.sampler_index_or_mode,
-         .index = desc.index,
-         .immediate_indices = desc.immediate_indices,
-         .op = BIFROST_TEX_OP_GRDESC_DER,
-         .offset_or_bias_disable = true,
-         .shadow_or_clamp_disable = true,
-         .array = false,
-         .dimension = desc.dimension,
-         .format = desc.format,
-         .mask = desc.mask,
-      };
-
-      unsigned coords_comp_count =
-         instr->coord_components -
-         (instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
-      bi_index derivs[4];
-      unsigned sr_count = 0;
-
-      if (coords_comp_count > 2)
-         derivs[sr_count++] = bi_extract(b, ddx, 2);
-      derivs[sr_count++] = bi_extract(b, ddy, 0);
-      if (coords_comp_count > 1)
-         derivs[sr_count++] = bi_extract(b, ddy, 1);
-      if (coords_comp_count > 2)
-         derivs[sr_count++] = bi_extract(b, ddy, 2);
-
-      bi_index derivs_packed = bi_temp(b->shader);
-      bi_make_vec_to(b, derivs_packed, derivs, NULL, sr_count, 32);
-      bi_index grdesc = bi_temp(b->shader);
-      bi_instr *I =
-         bi_texc_to(b, grdesc, derivs_packed, bi_extract(b, ddx, 0),
-                    coords_comp_count > 1 ? bi_extract(b, ddx, 1) : bi_zero(),
-                    bi_imm_u32(gropdesc.packed), true, sr_count, 0);
-      I->register_format = BI_REGISTER_FORMAT_U32;
-
-      bi_emit_cached_split_i32(b, grdesc, 4);
-
-      dregs[BIFROST_TEX_DREG_LOD] = bi_extract(b, grdesc, 0);
-      desc.lod_or_fetch = BIFROST_LOD_MODE_EXPLICIT;
-   }
-
-   /* Allocate staging registers contiguously by compacting the array. */
-   unsigned sr_count = 0;
-
-   for (unsigned i = 0; i < ARRAY_SIZE(dregs); ++i) {
-      if (!bi_is_null(dregs[i]))
-         dregs[sr_count++] = dregs[i];
-   }
-
-   unsigned res_size = instr->def.bit_size == 16 ? 2 : 4;
-
-   bi_index sr = sr_count ? bi_temp(b->shader) : bi_null();
-
-   if (sr_count)
-      bi_emit_collect_to(b, sr, dregs, sr_count);
-
-   if (instr->op == nir_texop_lod) {
-      assert(instr->def.num_components == 2 && instr->def.bit_size == 32);
-
-      bi_index res[2];
-      for (unsigned i = 0; i < 2; i++) {
-         desc.shadow_or_clamp_disable = i != 0;
-
-         bi_index grdesc = bi_temp(b->shader);
-         bi_instr *I = bi_texc_to(b, grdesc, sr, cx, cy,
-                                  bi_imm_u32(desc.packed), false, sr_count, 0);
-         I->register_format = BI_REGISTER_FORMAT_U32;
-
-         bi_emit_cached_split_i32(b, grdesc, 4);
-
-         bi_index lod = bi_s16_to_f32(b, bi_half(bi_extract(b, grdesc, 0), 0));
-
-         lod = bi_fmul_f32(b, lod, bi_imm_f32(1.0f / 256));
-
-         if (i == 0)
-            lod = bi_fround_f32(b, lod, BI_ROUND_NONE);
-
-         res[i] = lod;
-      }
-
-      bi_make_vec_to(b, bi_def_index(&instr->def), res, NULL, 2, 32);
-      return;
-   }
-
-   bi_index dst = bi_temp(b->shader);
-
-   bi_instr *I =
-      bi_texc_to(b, dst, sr, cx, cy, bi_imm_u32(desc.packed),
-                 !nir_tex_instr_has_implicit_derivative(instr), sr_count, 0);
-   I->register_format = bi_reg_fmt_for_nir(instr->dest_type);
-
-   bi_index w[4] = {bi_null(), bi_null(), bi_null(), bi_null()};
-   bi_emit_split_i32(b, w, dst, res_size);
-   bi_emit_collect_to(b, bi_def_index(&instr->def), w,
-                      DIV_ROUND_UP(instr->def.num_components * res_size, 4));
-}
-
 static void
 bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *tex)
 {
@ -4351,103 +3773,6 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *tex)
                  tex->def.num_components, tex->def.bit_size);
 }

-/* Simple textures ops correspond to NIR tex or txl with LOD = 0 on 2D/cube
- * textures with sufficiently small immediate indices. Anything else
- * needs a complete texture op. */
-
-static void
-bi_emit_texs(bi_builder *b, nir_tex_instr *instr)
-{
-   int coord_idx = nir_tex_instr_src_index(instr, nir_tex_src_coord);
-   assert(coord_idx >= 0);
-   bi_index coords = bi_src_index(&instr->src[coord_idx].src);
-
-   if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
-      bi_index face, s, t;
-      bi_emit_cube_coord(b, coords, &face, &s, &t);
-
-      bi_texs_cube_to(b, instr->def.bit_size, bi_def_index(&instr->def), s, t,
-                      face, instr->sampler_index, instr->texture_index);
-   } else {
-      bi_texs_2d_to(b, instr->def.bit_size, bi_def_index(&instr->def),
-                    bi_extract(b, coords, 0), bi_extract(b, coords, 1),
-                    instr->op != nir_texop_tex, /* zero LOD */
-                    instr->sampler_index, instr->texture_index);
-   }
-
-   bi_split_def(b, &instr->def);
-}
-
-static bool
-bi_is_simple_tex(nir_tex_instr *instr)
-{
-   if (instr->op != nir_texop_tex && instr->op != nir_texop_txl)
-      return false;
-
-   if (instr->dest_type != nir_type_float32 &&
-       instr->dest_type != nir_type_float16)
-      return false;
-
-   if (instr->is_shadow || instr->is_array)
-      return false;
-
-   switch (instr->sampler_dim) {
-   case GLSL_SAMPLER_DIM_2D:
-   case GLSL_SAMPLER_DIM_EXTERNAL:
-   case GLSL_SAMPLER_DIM_RECT:
-      break;
-
-   case GLSL_SAMPLER_DIM_CUBE:
-      /* LOD can't be specified with TEXS_CUBE */
-      if (instr->op == nir_texop_txl)
-         return false;
-      break;
-
-   default:
-      return false;
-   }
-
-   for (unsigned i = 0; i < instr->num_srcs; ++i) {
-      if (instr->src[i].src_type != nir_tex_src_lod &&
-          instr->src[i].src_type != nir_tex_src_coord)
-         return false;
-   }
-
-   /* Indices need to fit in provided bits */
-   unsigned idx_bits = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE ? 2 : 3;
-   if (MAX2(instr->sampler_index, instr->texture_index) >= (1 << idx_bits))
-      return false;
-
-   int lod_idx = nir_tex_instr_src_index(instr, nir_tex_src_lod);
-   if (lod_idx < 0)
-      return true;
-
-   nir_src lod = instr->src[lod_idx].src;
-   return nir_src_is_zero(lod);
-}
-
-static void
-bi_emit_tex(bi_builder *b, nir_tex_instr *instr)
-{
-   /* If txf is used, we assume there is a valid sampler bound at index 0. Use
-    * it for txf operations, since there may be no other valid samplers. This is
-    * a workaround: txf does not require a sampler in NIR (so sampler_index is
-    * undefined) but we need one in the hardware. This is ABI with the driver.
-    *
-    * On Valhall, as the descriptor table is encoded in the index, this should
-    * be handled by the driver.
-    */
-   if (!nir_tex_instr_need_sampler(instr) && b->shader->arch < 9)
-      instr->sampler_index = 0;
-
-   if (b->shader->arch >= 9)
-      bi_emit_tex_valhall(b, instr);
-   else if (bi_is_simple_tex(instr))
-      bi_emit_texs(b, instr);
-   else
-      bi_emit_texc(b, instr);
-}
-
 static void
 bi_emit_phi(bi_builder *b, nir_phi_instr *instr)
 {
@ -4514,7 +3839,8 @@ bi_emit_instr(bi_builder *b, struct nir_instr *instr)
      break;

   case nir_instr_type_tex:
-      bi_emit_tex(b, nir_instr_as_tex(instr));
+      assert(b->shader->arch >= 9);
+      bi_emit_tex_valhall(b, nir_instr_as_tex(instr));
      break;

   case nir_instr_type_jump:
--- a/src/panfrost/compiler/bifrost/bifrost_nir.c
+++ b/src/panfrost/compiler/bifrost/bifrost_nir.c
@ -943,8 +943,7 @@ bifrost_postprocess_nir(nir_shader *nir,
               &info->vs.needs_extended_fifo);
   }

-   if (pan_arch(gpu_id) >= 9)
-      NIR_PASS(_, nir, pan_nir_lower_tex, gpu_id);
+   NIR_PASS(_, nir, pan_nir_lower_tex, gpu_id);

   /* Our OpenCL compiler (src/panfrost/clc/pan_compile.c) has a very weird and
    * suboptimal optimization pipeline that results in a lot of unoptimized