pan/bi,va: Use dedicated LD_VAR_BUF_FLAT* opcodes on v14+

On v14+, flat source formats are no longer supported by LD_VAR_BUF and LD_VAR_BUF_IMM opcodes. This patch makes the compiler emit the dedicated LD_VAR_BUF_FLAT* opcodes instead. Add the ISA definitions, handle the new opcodes, and add packing tests for both immediate and indirect forms. Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
2026-05-08 09:08:10 +02:00 · 2026-03-31 13:05:04 +02:00 · 2026-03-31 13:05:04 +02:00 · 95596dbc0c
commit 95596dbc0c
parent 6dedfd66a4
5 changed files with 80 additions and 5 deletions
--- a/src/panfrost/compiler/bifrost/bifrost_compile.c
+++ b/src/panfrost/compiler/bifrost/bifrost_compile.c
@ -703,8 +703,10 @@ bi_emit_load_var_buf(bi_builder *b, nir_intrinsic_instr *intr)
   assert(intr->intrinsic == nir_intrinsic_load_var_buf_pan ||
          intr->intrinsic == nir_intrinsic_load_var_buf_flat_pan);

+   const unsigned arch = b->shader->arch;
+
   /* These are only available on Valhall+ */
-   assert(b->shader->arch >= 9);
+   assert(arch >= 9);

   const bool flat = intr->intrinsic == nir_intrinsic_load_var_buf_flat_pan;
   const nir_alu_type src_type = nir_intrinsic_src_type(intr);
@ -757,19 +759,36 @@ bi_emit_load_var_buf(bi_builder *b, nir_intrinsic_instr *intr)
   bool use_imm_form = false;
   if (nir_src_is_const(intr->src[0])) {
      imm_offset = nir_src_as_uint(intr->src[0]);
-      assert(imm_offset < pan_ld_var_buf_off_size(b->shader->arch));
+      assert(imm_offset < pan_ld_var_buf_off_size(arch));

      use_imm_form = true;
   }

+   /* On v14+, flat source formats are removed from LD_VAR_BUF/LD_VAR_BUF_IMM,
+    * so flat buffer varyings must use the dedicated LD_VAR_BUF_FLAT*.
+    */
   if (use_imm_form) {
-      bi_ld_var_buf_imm_to(b, sz, dest, src0, regfmt, sample, source_format,
+      if (arch >= 14 && flat) {
+         bi_ld_var_buf_flat_imm_to(b, dest, regfmt, vecsize, imm_offset);
+      } else {
+         bi_ld_var_buf_imm_to(b, sz, dest, src0, regfmt, sample, source_format,
                           BI_UPDATE_STORE, vecsize, imm_offset);
+      }
   } else {
      bi_index offset = bi_src_index(&intr->src[0]);
-      bi_ld_var_buf_to(b, sz, dest, src0, offset, regfmt, sample,
-                       source_format, BI_UPDATE_STORE, vecsize);
+      if (arch >= 14 && flat) {
+         bi_ld_var_buf_flat_to(b, dest, offset, regfmt, vecsize);
+      } else {
+         bi_ld_var_buf_to(b, sz, dest, src0, offset, regfmt, sample,
+                          source_format, BI_UPDATE_STORE, vecsize);
+      }
   }
+
+   /* LD_VAR_BUF_FLAT* only support register formats F16 and F32. */
+   assert(
+      arch < 14 || !flat ||
+      (regfmt == BI_REGISTER_FORMAT_F16 || regfmt == BI_REGISTER_FORMAT_F32));
+
   bi_split_def(b, &intr->def);
 }

--- a/src/panfrost/compiler/bifrost/valhall/ISA.xml
+++ b/src/panfrost/compiler/bifrost/valhall/ISA.xml
@ -940,6 +940,32 @@
    <imm name="index" start="12" size="4"/> <!-- 0 for pointx, 1 for pointy, 2 for fragw, 3 for fragz -->
  </ins>

+  <ins name="LD_VAR_BUF_FLAT_IMM" title="Load immediate flat varying" message="varying" unit="V">
+    <opcode>
+      <op val="0x40" start="48" mask="0x1FF"/>
+    </opcode>
+    <desc>Fetches a given flat varying from hardware buffer</desc>
+    <slot/>
+    <vecsize/>
+    <regfmt/>
+    <sr write="true"/>
+    <sr_count count="format"/>
+    <imm name="index" start="8" size="11"/>
+  </ins>
+
+  <ins name="LD_VAR_BUF_FLAT" title="Load indirect flat varying" message="varying" unit="V">
+    <opcode>
+      <op val="0x5F" start="48" mask="0x1FF"/>
+    </opcode>
+    <desc>Fetches a given flat varying from hardware buffer</desc>
+    <slot/>
+    <vecsize/>
+    <regfmt/>
+    <sr write="true"/>
+    <sr_count count="format"/>
+    <src/>
+  </ins>
+
  <group name="LD_VAR_BUF_IMM" title="Load immediate varying" message="varying" unit="V">
    <desc>Interpolates a given varying from hardware buffer</desc>
    <ins name="LD_VAR_BUF_IMM.f32">
--- a/src/panfrost/compiler/bifrost/valhall/test/test-packing.cpp
+++ b/src/panfrost/compiler/bifrost/valhall/test/test-packing.cpp
@ -294,6 +294,30 @@ TEST_F(ValhallPacking, LdVarBufImmF16)
             11, 0x005d80443300083d);
 }

+TEST_F(ValhallPacking, LdVarBufFlatImmFormat)
+{
+   CASE_ARCH(bi_ld_var_buf_flat_imm_to(b, bi_register(0),
+                                       BI_REGISTER_FORMAT_F32,
+                                       BI_VECSIZE_V4, 0x12),
+             14, 0x0040800832001200);
+
+   CASE_ARCH(bi_ld_var_buf_flat_imm_to(b, bi_register(0),
+                                       BI_REGISTER_FORMAT_F16,
+                                       BI_VECSIZE_V4, 0x12),
+             14, 0x0040800433001200);
+}
+
+TEST_F(ValhallPacking, LdVarBufFlat)
+{
+   CASE_ARCH(bi_ld_var_buf_flat_to(b, bi_register(0), bi_register(61),
+                                   BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4),
+             14, 0x005f80083200003d);
+
+   CASE_ARCH(bi_ld_var_buf_flat_to(b, bi_register(0), bi_register(61),
+                                   BI_REGISTER_FORMAT_F16, BI_VECSIZE_V4),
+             14, 0x005f80043300003d);
+}
+
 TEST_F(ValhallPacking, LeaBufImm)
 {
   CASE(bi_lea_buf_imm_to(b, bi_register(4), bi_discard(bi_register(59))),
--- a/src/panfrost/compiler/bifrost/valhall/va_gather_hsr_info.c
+++ b/src/panfrost/compiler/bifrost/valhall/va_gather_hsr_info.c
@ -77,6 +77,8 @@ walk_bir_shader(bi_context *ctx, struct pan_shader_info *info)
            if (instr->sample == BI_SAMPLE_CENTROID)
               info->fs.hsr.centroid_interpolation = true;
            FALLTHROUGH;
+         case BI_OPCODE_LD_VAR_BUF_FLAT:
+         case BI_OPCODE_LD_VAR_BUF_FLAT_IMM:
         case BI_OPCODE_LD_VAR_FLAT:
         case BI_OPCODE_LD_VAR_FLAT_IMM:
            if (!found_atest)
--- a/src/panfrost/compiler/bifrost/valhall/va_pack.c
+++ b/src/panfrost/compiler/bifrost/valhall/va_pack.c
@ -566,6 +566,10 @@ va_pack_alu(const bi_instr *I, unsigned arch)
      hex |= ((uint64_t)I->sample) << 38;
      break;

+   case BI_OPCODE_LD_VAR_BUF_FLAT_IMM:
+      hex |= ((uint64_t)I->index) << 8;
+      break;
+
   case BI_OPCODE_LD_ATTR_IMM:
      hex |= ((uint64_t)I->table) << 16;
      hex |= ((uint64_t)I->attribute_index) << 20;