mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 22:49:13 +02:00
nvir: introduce OP_BREV with lowering to EXTBF_REV for current GPUs
SM70 has this instruction, but no BFE. Signed-off-by: Ben Skeggs <bskeggs@redhat.com> Reviewed-by: Karol Herbst <kherbst@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5377>
This commit is contained in:
parent
ddedfcdf21
commit
60b28f7a50
8 changed files with 29 additions and 12 deletions
|
|
@ -152,6 +152,7 @@ enum operation
|
|||
OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7]
|
||||
OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK
|
||||
OP_BFIND, // find highest/lowest set bit
|
||||
OP_BREV, // bitfield reverse
|
||||
OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order)
|
||||
OP_ATOM,
|
||||
OP_BAR, // execution barrier, sources = { id, thread count, predicate }
|
||||
|
|
|
|||
|
|
@ -1910,7 +1910,7 @@ Converter::visit(nir_intrinsic_instr *insn)
|
|||
|
||||
if (op == nir_intrinsic_read_first_invocation) {
|
||||
mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
|
||||
mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
|
||||
mkOp1(OP_BREV, TYPE_U32, tmp, tmp);
|
||||
mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
|
||||
} else
|
||||
tmp = getSrc(&insn->src[1], 0);
|
||||
|
|
@ -2794,14 +2794,14 @@ Converter::visit(nir_alu_instr *insn)
|
|||
case nir_op_bitfield_reverse: {
|
||||
DEFAULT_CHECKS;
|
||||
LValues &newDefs = convert(&insn->dest);
|
||||
mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
|
||||
mkOp1(OP_BREV, TYPE_U32, newDefs[0], getSrc(&insn->src[0]));
|
||||
break;
|
||||
}
|
||||
case nir_op_find_lsb: {
|
||||
DEFAULT_CHECKS;
|
||||
LValues &newDefs = convert(&insn->dest);
|
||||
Value *tmp = getSSA();
|
||||
mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
|
||||
mkOp1(OP_BREV, TYPE_U32, tmp, getSrc(&insn->src[0]));
|
||||
mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3401,8 +3401,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
|
|||
// ReadInvocationARB(src, findLSB(ballot(true)))
|
||||
val0 = getScratch();
|
||||
mkOp1(OP_VOTE, TYPE_U32, val0, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
|
||||
mkOp2(OP_EXTBF, TYPE_U32, val0, val0, mkImm(0x2000))
|
||||
->subOp = NV50_IR_SUBOP_EXTBF_REV;
|
||||
mkOp1(OP_BREV, TYPE_U32, val0, val0);
|
||||
mkOp1(OP_BFIND, TYPE_U32, val0, val0)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
|
||||
src1 = val0;
|
||||
/* fallthrough */
|
||||
|
|
@ -3820,8 +3819,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
|
|||
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
|
||||
src0 = fetchSrc(0, c);
|
||||
val0 = getScratch();
|
||||
geni = mkOp2(OP_EXTBF, TYPE_U32, val0, src0, mkImm(0x2000));
|
||||
geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
|
||||
mkOp1(OP_BREV, TYPE_U32, val0, src0);
|
||||
geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], val0);
|
||||
geni->subOp = NV50_IR_SUBOP_BFIND_SAMT;
|
||||
}
|
||||
|
|
@ -3836,8 +3834,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
|
|||
case TGSI_OPCODE_BREV:
|
||||
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
|
||||
src0 = fetchSrc(0, c);
|
||||
geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000));
|
||||
geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
|
||||
mkOp1(OP_BREV, TYPE_U32, dst0[c], src0);
|
||||
}
|
||||
break;
|
||||
case TGSI_OPCODE_POPC:
|
||||
|
|
|
|||
|
|
@ -310,6 +310,14 @@ NVC0LegalizeSSA::handleSET(CmpInstruction *cmp)
|
|||
cmp->sType = hTy;
|
||||
}
|
||||
|
||||
void
|
||||
NVC0LegalizeSSA::handleBREV(Instruction *i)
|
||||
{
|
||||
i->op = OP_EXTBF;
|
||||
i->subOp = NV50_IR_SUBOP_EXTBF_REV;
|
||||
i->setSrc(1, bld.mkImm(0x2000));
|
||||
}
|
||||
|
||||
bool
|
||||
NVC0LegalizeSSA::visit(Function *fn)
|
||||
{
|
||||
|
|
@ -354,6 +362,9 @@ NVC0LegalizeSSA::visit(BasicBlock *bb)
|
|||
if (typeSizeof(i->sType) == 8 && i->sType != TYPE_F64)
|
||||
handleSET(i->asCmp());
|
||||
break;
|
||||
case OP_BREV:
|
||||
handleBREV(i);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -68,6 +68,7 @@ private:
|
|||
void handleSET(CmpInstruction *);
|
||||
void handleTEXLOD(TexInstruction *);
|
||||
void handleShift(Instruction *);
|
||||
void handleBREV(Instruction *);
|
||||
|
||||
protected:
|
||||
BuildUtil bld;
|
||||
|
|
|
|||
|
|
@ -1534,6 +1534,12 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
|
|||
i->subOp = 0;
|
||||
break;
|
||||
}
|
||||
case OP_BREV: {
|
||||
uint32_t res = util_bitreverse(imm0.reg.data.u32);
|
||||
i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res));
|
||||
i->op = OP_MOV;
|
||||
break;
|
||||
}
|
||||
case OP_POPCNT: {
|
||||
// Only deal with 1-arg POPCNT here
|
||||
if (i->srcExists(1))
|
||||
|
|
|
|||
|
|
@ -178,6 +178,7 @@ const char *operationStr[OP_LAST + 1] =
|
|||
"insbf",
|
||||
"extbf",
|
||||
"bfind",
|
||||
"brev",
|
||||
"permt",
|
||||
"atom",
|
||||
"bar",
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ const uint8_t Target::operationSrcNr[] =
|
|||
0, // TEXBAR
|
||||
1, 1, // DFDX, DFDY
|
||||
1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
|
||||
2, 3, 2, 1, 3, // POPCNT, INSBF, EXTBF, BFIND, PERMT
|
||||
2, 3, 2, 1, 1, 3, // POPCNT, INSBF, EXTBF, BFIND, BREV, PERMT
|
||||
2, 2, // ATOM, BAR
|
||||
2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
|
||||
2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL
|
||||
|
|
@ -120,9 +120,9 @@ const OpClass Target::operationClass[] =
|
|||
// DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
|
||||
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
|
||||
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
|
||||
// POPCNT, INSBF, EXTBF, BFIND; PERMT
|
||||
// POPCNT, INSBF, EXTBF, BFIND, BREV; PERMT
|
||||
OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
|
||||
OPCLASS_BITFIELD,
|
||||
OPCLASS_BITFIELD, OPCLASS_BITFIELD,
|
||||
// ATOM, BAR
|
||||
OPCLASS_ATOMIC, OPCLASS_CONTROL,
|
||||
// VADD, VAVG, VMIN, VMAX
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue