nvir: introduce OP_BMSK

This replaces the existing implementation without adding lowering for earlier GPUs. The reason for this is because the existing code isn't at all correct, and it also can't be hit anyway. Will be required to support SM70 lowering passes. v2: - fixup source selection Signed-off-by: Ben Skeggs <bskeggs@redhat.com> Reviewed-by: Karol Herbst <kherbst@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5377>
2026-05-05 07:28:11 +02:00 · 2020-06-07 09:51:55 +10:00 · 2020-06-07 09:51:55 +10:00 · 6fd41da1ef
commit 6fd41da1ef
parent e1e4d1d373
5 changed files with 11 additions and 4 deletions
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@ -154,6 +154,7 @@ enum operation
   OP_EXTBF,  // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK
   OP_BFIND,  // find highest/lowest set bit
   OP_BREV,   // bitfield reverse
+   OP_BMSK,   // bitfield mask
   OP_PERMT,  // dst = bytes from src2,src0 selected by src1 (nvc0's src order)
   OP_ATOM,
   OP_BAR,    // execution barrier, sources = { id, thread count, predicate }
@ -267,6 +268,8 @@ enum operation
      uint8_t c = NV50_IR_SUBOP_LOP3_LUT_SRC2; \
      (uint8_t)(exp);                          \
 })
+#define NV50_IR_SUBOP_BMSK_C (0 << 0)
+#define NV50_IR_SUBOP_BMSK_W (1 << 0)

 #define NV50_IR_SUBOP_MINMAX_LOW  1
 #define NV50_IR_SUBOP_MINMAX_MED  2
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@ -2774,7 +2774,7 @@ Converter::visit(nir_alu_instr *insn)
   case nir_op_bfm: {
      DEFAULT_CHECKS;
      LValues &newDefs = convert(&insn->dest);
-      mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
+      mkOp2(OP_BMSK, dType, newDefs[0], getSrc(&insn->src[1]), getSrc(&insn->src[0]))->subOp = NV50_IR_SUBOP_BMSK_W;
      break;
   }
   case nir_op_bitfield_insert: {
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@ -558,6 +558,9 @@ ConstantFolding::expr(Instruction *i,
   memset(&res.data, 0, sizeof(res.data));

   switch (i->op) {
+   case OP_BMSK:
+      res.data.u32 = ((1 << b->data.u32) - 1) << a->data.u32;
+      break;
   case OP_MAD:
   case OP_FMA:
   case OP_MUL:
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@ -180,6 +180,7 @@ const char *operationStr[OP_LAST + 1] =
   "extbf",
   "bfind",
   "brev",
+   "bmsk",
   "permt",
   "atom",
   "bar",
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@ -51,7 +51,7 @@ const uint8_t Target::operationSrcNr[] =
   0,                      // TEXBAR
   1, 1,                   // DFDX, DFDY
   1, 2, 1, 2, 0, 0,       // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
-   2, 3, 2, 1, 1, 3,       // POPCNT, INSBF, EXTBF, BFIND, BREV, PERMT
+   2, 3, 2, 1, 1, 2, 3,    // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK, PERMT
   2, 2,                   // ATOM, BAR
   2, 2, 2, 2, 3, 2,       // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
   2, 2, 2, 1,             // VSHR, VSHL, VSEL, CCTL
@ -120,9 +120,9 @@ const OpClass Target::operationClass[] =
   // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
   OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
   OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
-   // POPCNT, INSBF, EXTBF, BFIND, BREV; PERMT
+   // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK; PERMT
   OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
-   OPCLASS_BITFIELD, OPCLASS_BITFIELD,
+   OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
   // ATOM, BAR
   OPCLASS_ATOMIC, OPCLASS_CONTROL,
   // VADD, VAVG, VMIN, VMAX