nvir: introduce OP_BREV with lowering to EXTBF_REV for current GPUs

SM70 has this instruction, but no BFE.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Reviewed-by: Karol Herbst <kherbst@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5377>
This commit is contained in:
Ben Skeggs 2020-06-07 09:51:51 +10:00 committed by Marge Bot
parent ddedfcdf21
commit 60b28f7a50
8 changed files with 29 additions and 12 deletions

View file

@ -152,6 +152,7 @@ enum operation
OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7]
OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK
OP_BFIND, // find highest/lowest set bit
OP_BREV, // bitfield reverse
OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order)
OP_ATOM,
OP_BAR, // execution barrier, sources = { id, thread count, predicate }

View file

@ -1910,7 +1910,7 @@ Converter::visit(nir_intrinsic_instr *insn)
if (op == nir_intrinsic_read_first_invocation) {
mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
mkOp1(OP_BREV, TYPE_U32, tmp, tmp);
mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
} else
tmp = getSrc(&insn->src[1], 0);
@ -2794,14 +2794,14 @@ Converter::visit(nir_alu_instr *insn)
case nir_op_bitfield_reverse: {
DEFAULT_CHECKS;
LValues &newDefs = convert(&insn->dest);
mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
mkOp1(OP_BREV, TYPE_U32, newDefs[0], getSrc(&insn->src[0]));
break;
}
case nir_op_find_lsb: {
DEFAULT_CHECKS;
LValues &newDefs = convert(&insn->dest);
Value *tmp = getSSA();
mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
mkOp1(OP_BREV, TYPE_U32, tmp, getSrc(&insn->src[0]));
mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
break;
}

View file

@ -3401,8 +3401,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
// ReadInvocationARB(src, findLSB(ballot(true)))
val0 = getScratch();
mkOp1(OP_VOTE, TYPE_U32, val0, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
mkOp2(OP_EXTBF, TYPE_U32, val0, val0, mkImm(0x2000))
->subOp = NV50_IR_SUBOP_EXTBF_REV;
mkOp1(OP_BREV, TYPE_U32, val0, val0);
mkOp1(OP_BFIND, TYPE_U32, val0, val0)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
src1 = val0;
/* fallthrough */
@ -3820,8 +3819,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
val0 = getScratch();
geni = mkOp2(OP_EXTBF, TYPE_U32, val0, src0, mkImm(0x2000));
geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
mkOp1(OP_BREV, TYPE_U32, val0, src0);
geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], val0);
geni->subOp = NV50_IR_SUBOP_BFIND_SAMT;
}
@ -3836,8 +3834,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
case TGSI_OPCODE_BREV:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000));
geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
mkOp1(OP_BREV, TYPE_U32, dst0[c], src0);
}
break;
case TGSI_OPCODE_POPC:

View file

@ -310,6 +310,14 @@ NVC0LegalizeSSA::handleSET(CmpInstruction *cmp)
cmp->sType = hTy;
}
void
NVC0LegalizeSSA::handleBREV(Instruction *i)
{
i->op = OP_EXTBF;
i->subOp = NV50_IR_SUBOP_EXTBF_REV;
i->setSrc(1, bld.mkImm(0x2000));
}
bool
NVC0LegalizeSSA::visit(Function *fn)
{
@ -354,6 +362,9 @@ NVC0LegalizeSSA::visit(BasicBlock *bb)
if (typeSizeof(i->sType) == 8 && i->sType != TYPE_F64)
handleSET(i->asCmp());
break;
case OP_BREV:
handleBREV(i);
break;
default:
break;
}

View file

@ -68,6 +68,7 @@ private:
void handleSET(CmpInstruction *);
void handleTEXLOD(TexInstruction *);
void handleShift(Instruction *);
void handleBREV(Instruction *);
protected:
BuildUtil bld;

View file

@ -1534,6 +1534,12 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
i->subOp = 0;
break;
}
case OP_BREV: {
uint32_t res = util_bitreverse(imm0.reg.data.u32);
i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res));
i->op = OP_MOV;
break;
}
case OP_POPCNT: {
// Only deal with 1-arg POPCNT here
if (i->srcExists(1))

View file

@ -178,6 +178,7 @@ const char *operationStr[OP_LAST + 1] =
"insbf",
"extbf",
"bfind",
"brev",
"permt",
"atom",
"bar",

View file

@ -51,7 +51,7 @@ const uint8_t Target::operationSrcNr[] =
0, // TEXBAR
1, 1, // DFDX, DFDY
1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
2, 3, 2, 1, 3, // POPCNT, INSBF, EXTBF, BFIND, PERMT
2, 3, 2, 1, 1, 3, // POPCNT, INSBF, EXTBF, BFIND, BREV, PERMT
2, 2, // ATOM, BAR
2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL
@ -120,9 +120,9 @@ const OpClass Target::operationClass[] =
// DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
// POPCNT, INSBF, EXTBF, BFIND; PERMT
// POPCNT, INSBF, EXTBF, BFIND, BREV; PERMT
OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
OPCLASS_BITFIELD,
OPCLASS_BITFIELD, OPCLASS_BITFIELD,
// ATOM, BAR
OPCLASS_ATOMIC, OPCLASS_CONTROL,
// VADD, VAVG, VMIN, VMAX