aco: mostly implement FS input loads on GFX11

Quad-divergent CF and vertex selection doesn't work, but should at least prevent crashes. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17333>
2025-12-22 00:30:13 +01:00 · 2022-06-20 18:11:07 +01:00 · 2022-06-20 18:11:07 +01:00 · 3730be9873
commit 3730be9873
parent 826ed52174
1 changed files with 47 additions and 5 deletions
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@ -5262,10 +5262,38 @@ visit_store_output(isel_context* ctx, nir_intrinsic_instr* instr)
   }
 }
 void
 emit_interp_instr_gfx11(isel_context* ctx, unsigned idx, unsigned component, Temp src, Temp dst,
                        Temp prim_mask)
 {
   Temp coord1 = emit_extract_vector(ctx, src, 0, v1);
   Temp coord2 = emit_extract_vector(ctx, src, 1, v1);
   Builder bld(ctx->program, ctx->block);
   //TODO: this doesn't work in quad-divergent control flow
   Temp p = bld.ldsdir(aco_opcode::lds_param_load, bld.def(v1), bld.m0(prim_mask), idx, component);
   if (dst.regClass() == v2b) {
      Temp p10 =
         bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, bld.def(v1), p, coord1, p);
      bld.vinterp_inreg(aco_opcode::v_interp_p2_f16_f32_inreg, Definition(dst), p, coord2, p10);
   } else {
      Temp p10 = bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, bld.def(v1), p, coord1, p);
      bld.vinterp_inreg(aco_opcode::v_interp_p2_f32_inreg, Definition(dst), p, coord2, p10);
   }
 }
 void
 emit_interp_instr(isel_context* ctx, unsigned idx, unsigned component, Temp src, Temp dst,
                  Temp prim_mask)
 {
   if (ctx->options->gfx_level >= GFX11) {
      emit_interp_instr_gfx11(ctx, idx, component, src, dst, prim_mask);
      return;
   }
   Temp coord1 = emit_extract_vector(ctx, src, 0, v1);
   Temp coord2 = emit_extract_vector(ctx, src, 1, v1);
@ -5304,6 +5332,22 @@ emit_interp_instr(isel_context* ctx, unsigned idx, unsigned component, Temp src,
   }
 }
 void
 emit_interp_mov_instr(isel_context* ctx, unsigned idx, unsigned component, unsigned vertex_id,
                      Temp dst, Temp prim_mask)
 {
   Builder bld(ctx->program, ctx->block);
   if (ctx->options->gfx_level >= GFX11) {
      //TODO: this doesn't work in quad-divergent control flow and ignores vertex_id
      Temp p = bld.ldsdir(aco_opcode::lds_param_load, bld.def(v1), bld.m0(prim_mask), idx, component);
      uint16_t dpp_ctrl = dpp_quad_perm(0, 0, 0, 0);
      bld.vop1_dpp(aco_opcode::v_mov_b32, Definition(dst), p, dpp_ctrl);
   } else {
      bld.vintrp(aco_opcode::v_interp_mov_f32, Definition(dst), Operand::c32(vertex_id),
                 bld.m0(prim_mask), idx, component);
   }
 }
 void
 emit_load_frag_coord(isel_context* ctx, Temp dst, unsigned num_components)
 {
@ -5720,8 +5764,7 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
      if (instr->dest.ssa.num_components == 1 &&
          instr->dest.ssa.bit_size != 64) {
-         bld.vintrp(aco_opcode::v_interp_mov_f32, Definition(dst), Operand::c32(vertex_id),
+         emit_interp_mov_instr(ctx, idx, component, vertex_id, dst, prim_mask);
                    bld.m0(prim_mask), idx, component);
      } else {
         unsigned num_components = instr->dest.ssa.num_components;
         if (instr->dest.ssa.bit_size == 64)
@ -5731,9 +5774,8 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
         for (unsigned i = 0; i < num_components; i++) {
            unsigned chan_component = (component + i) % 4;
            unsigned chan_idx = idx + (component + i) / 4;
-            vec->operands[i] = bld.vintrp(
+            vec->operands[i] = Operand(bld.tmp(instr->dest.ssa.bit_size == 16 ? v2b : v1));
-               aco_opcode::v_interp_mov_f32, bld.def(instr->dest.ssa.bit_size == 16 ? v2b : v1),
+            emit_interp_mov_instr(ctx, chan_idx, chan_component, vertex_id, vec->operands[i].getTemp(), prim_mask);
               Operand::c32(vertex_id), bld.m0(prim_mask), chan_idx, chan_component);
         }
         vec->definitions[0] = Definition(dst);
         bld.insert(std::move(vec));