mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-11 22:10:30 +01:00
nv50,nvc0: handle SQRT lowering inside the driver
First off, st/mesa lowers DSQRT incorrectly (it uses CMP to attempt to find out whether the input is less than 0). Secondly the current approach (x * rsq(x)) behaves poorly for x = inf - a NaN is produced instead of inf. Instead we switch to the less accurate rcp(rsq(x)) method - this behaves nicely for all valid inputs. We still don't do this for DSQRT since the RSQ/RCP ops are *really* inaccurate, and don't even have Newton-Raphson steps right now. Eventually we should have a separate library function for DSQRT that does it more precisely (and perhaps move this lowering to the post-opt phase). This fixes a number of dEQP precision tests that were expecting better behavior for infinite inputs. Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Tested-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
parent
b3e7fb5234
commit
c1e4a6bfbf
6 changed files with 27 additions and 23 deletions
|
|
@ -585,6 +585,7 @@ BuildUtil::split64BitOpPostRA(Function *fn, Instruction *i,
|
|||
return NULL;
|
||||
srcNr = 2;
|
||||
break;
|
||||
case OP_SELP: srcNr = 3; break;
|
||||
default:
|
||||
// TODO when needed
|
||||
return NULL;
|
||||
|
|
@ -601,7 +602,10 @@ BuildUtil::split64BitOpPostRA(Function *fn, Instruction *i,
|
|||
|
||||
for (int s = 0; s < srcNr; ++s) {
|
||||
if (lo->getSrc(s)->reg.size < 8) {
|
||||
hi->setSrc(s, zero);
|
||||
if (s == 2)
|
||||
hi->setSrc(s, lo->getSrc(s));
|
||||
else
|
||||
hi->setSrc(s, zero);
|
||||
} else {
|
||||
if (lo->getSrc(s)->refCount() > 1)
|
||||
lo->setSrc(s, cloneShallow(fn, lo->getSrc(s)));
|
||||
|
|
|
|||
|
|
@ -616,6 +616,7 @@ static nv50_ir::operation translateOpcode(uint opcode)
|
|||
|
||||
NV50_IR_OPCODE_CASE(RCP, RCP);
|
||||
NV50_IR_OPCODE_CASE(RSQ, RSQ);
|
||||
NV50_IR_OPCODE_CASE(SQRT, SQRT);
|
||||
|
||||
NV50_IR_OPCODE_CASE(MUL, MUL);
|
||||
NV50_IR_OPCODE_CASE(ADD, ADD);
|
||||
|
|
@ -2689,6 +2690,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
|
|||
case TGSI_OPCODE_FLR:
|
||||
case TGSI_OPCODE_TRUNC:
|
||||
case TGSI_OPCODE_RCP:
|
||||
case TGSI_OPCODE_SQRT:
|
||||
case TGSI_OPCODE_IABS:
|
||||
case TGSI_OPCODE_INEG:
|
||||
case TGSI_OPCODE_NOT:
|
||||
|
|
|
|||
|
|
@ -1203,10 +1203,9 @@ NV50LoweringPreSSA::handleDIV(Instruction *i)
|
|||
bool
|
||||
NV50LoweringPreSSA::handleSQRT(Instruction *i)
|
||||
{
|
||||
Instruction *rsq = bld.mkOp1(OP_RSQ, TYPE_F32,
|
||||
bld.getSSA(), i->getSrc(0));
|
||||
i->op = OP_MUL;
|
||||
i->setSrc(1, rsq->getDef(0));
|
||||
bld.setPosition(i, true);
|
||||
i->op = OP_RSQ;
|
||||
bld.mkOp1(OP_RCP, i->dType, i->getDef(0), i->getDef(0));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1778,22 +1778,21 @@ NVC0LoweringPass::handleMOD(Instruction *i)
|
|||
bool
|
||||
NVC0LoweringPass::handleSQRT(Instruction *i)
|
||||
{
|
||||
Value *pred = bld.getSSA(1, FILE_PREDICATE);
|
||||
Value *zero = bld.getSSA();
|
||||
Instruction *rsq;
|
||||
|
||||
bld.mkOp1(OP_MOV, TYPE_U32, zero, bld.mkImm(0));
|
||||
if (i->dType == TYPE_F64)
|
||||
zero = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), zero, zero);
|
||||
bld.mkCmp(OP_SET, CC_LE, i->dType, pred, i->dType, i->getSrc(0), zero);
|
||||
bld.mkOp1(OP_MOV, i->dType, i->getDef(0), zero)->setPredicate(CC_P, pred);
|
||||
rsq = bld.mkOp1(OP_RSQ, i->dType,
|
||||
bld.getSSA(typeSizeof(i->dType)), i->getSrc(0));
|
||||
rsq->setPredicate(CC_NOT_P, pred);
|
||||
i->op = OP_MUL;
|
||||
i->setSrc(1, rsq->getDef(0));
|
||||
i->setPredicate(CC_NOT_P, pred);
|
||||
|
||||
if (i->dType == TYPE_F64) {
|
||||
Value *pred = bld.getSSA(1, FILE_PREDICATE);
|
||||
Value *zero = bld.loadImm(NULL, 0.0d);
|
||||
Value *dst = bld.getSSA(8);
|
||||
bld.mkOp1(OP_RSQ, i->dType, dst, i->getSrc(0));
|
||||
bld.mkCmp(OP_SET, CC_LE, i->dType, pred, i->dType, i->getSrc(0), zero);
|
||||
bld.mkOp3(OP_SELP, TYPE_U64, dst, zero, dst, pred);
|
||||
i->op = OP_MUL;
|
||||
i->setSrc(1, dst);
|
||||
// TODO: Handle this properly with a library function
|
||||
} else {
|
||||
bld.setPosition(i, true);
|
||||
i->op = OP_RSQ;
|
||||
bld.mkOp1(OP_RCP, i->dType, i->getDef(0), i->getDef(0));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -305,7 +305,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
|||
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
|
||||
return 1;
|
||||
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
|
||||
return 0;
|
||||
return 1;
|
||||
case PIPE_SHADER_CAP_SUBROUTINES:
|
||||
return 0; /* please inline, or provide function declarations */
|
||||
case PIPE_SHADER_CAP_INTEGERS:
|
||||
|
|
|
|||
|
|
@ -328,7 +328,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
|||
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
|
||||
return 1;
|
||||
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
|
||||
return 0;
|
||||
return 1;
|
||||
case PIPE_SHADER_CAP_SUBROUTINES:
|
||||
return 1;
|
||||
case PIPE_SHADER_CAP_INTEGERS:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue