mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 11:30:11 +01:00
intel/fs: Add a scale factor to emit_fsign
Normally fsign generates -1, 0, or +1. The new scale factor, S, causes
fsign to generate -S, 0, or +S.
v2: Rebase on v2 changes in previous commit.
v3: Rebase on 85c35885b3 ("nir: Rework nir_src_as_alu_instr to not take
a pointer").
Reviewed-by: Matt Turner <mattst88@gmail.com> [v2]
This commit is contained in:
parent
ad98fbc217
commit
06d2c11641
2 changed files with 77 additions and 12 deletions
|
|
@ -188,7 +188,7 @@ public:
|
||||||
fs_reg resolve_source_modifiers(const fs_reg &src);
|
fs_reg resolve_source_modifiers(const fs_reg &src);
|
||||||
void emit_discard_jump();
|
void emit_discard_jump();
|
||||||
void emit_fsign(const class brw::fs_builder &, const nir_alu_instr *instr,
|
void emit_fsign(const class brw::fs_builder &, const nir_alu_instr *instr,
|
||||||
fs_reg result, fs_reg *op);
|
fs_reg result, fs_reg *op, unsigned fsign_src);
|
||||||
bool opt_peephole_sel();
|
bool opt_peephole_sel();
|
||||||
bool opt_peephole_csel();
|
bool opt_peephole_csel();
|
||||||
bool opt_peephole_predicated_break();
|
bool opt_peephole_predicated_break();
|
||||||
|
|
|
||||||
|
|
@ -802,26 +802,72 @@ fs_visitor::try_emit_b2fi_of_inot(const fs_builder &bld,
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Emit code for nir_op_fsign
|
* Emit code for nir_op_fsign possibly fused with a nir_op_fmul
|
||||||
|
*
|
||||||
|
* If \c instr is not the \c nir_op_fsign, then \c fsign_src is the index of
|
||||||
|
* the source of \c instr that is a \c nir_op_fsign.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
fs_visitor::emit_fsign(const fs_builder &bld, const nir_alu_instr *instr,
|
fs_visitor::emit_fsign(const fs_builder &bld, const nir_alu_instr *instr,
|
||||||
fs_reg result, fs_reg *op)
|
fs_reg result, fs_reg *op, unsigned fsign_src)
|
||||||
{
|
{
|
||||||
fs_inst *inst;
|
fs_inst *inst;
|
||||||
|
|
||||||
assert(instr->op == nir_op_fsign);
|
assert(instr->op == nir_op_fsign || instr->op == nir_op_fmul);
|
||||||
|
assert(fsign_src < nir_op_infos[instr->op].num_inputs);
|
||||||
|
|
||||||
|
if (instr->op != nir_op_fsign) {
|
||||||
|
const nir_alu_instr *const fsign_instr =
|
||||||
|
nir_src_as_alu_instr(instr->src[fsign_src].src);
|
||||||
|
|
||||||
|
assert(!fsign_instr->dest.saturate);
|
||||||
|
|
||||||
|
/* op[fsign_src] has the nominal result of the fsign, and op[1 -
|
||||||
|
* fsign_src] has the other multiply source. This must be rearranged so
|
||||||
|
* that op[0] is the source of the fsign op[1] is the other multiply
|
||||||
|
* source.
|
||||||
|
*/
|
||||||
|
if (fsign_src != 0)
|
||||||
|
op[1] = op[0];
|
||||||
|
|
||||||
|
op[0] = get_nir_src(fsign_instr->src[0].src);
|
||||||
|
|
||||||
|
const nir_alu_type t =
|
||||||
|
(nir_alu_type)(nir_op_infos[instr->op].input_types[0] |
|
||||||
|
nir_src_bit_size(fsign_instr->src[0].src));
|
||||||
|
|
||||||
|
op[0].type = brw_type_for_nir_type(devinfo, t);
|
||||||
|
op[0].abs = fsign_instr->src[0].abs;
|
||||||
|
op[0].negate = fsign_instr->src[0].negate;
|
||||||
|
|
||||||
|
unsigned channel = 0;
|
||||||
|
if (nir_op_infos[instr->op].output_size == 0) {
|
||||||
|
/* Since NIR is doing the scalarizing for us, we should only ever see
|
||||||
|
* vectorized operations with a single channel.
|
||||||
|
*/
|
||||||
|
assert(util_bitcount(instr->dest.write_mask) == 1);
|
||||||
|
channel = ffs(instr->dest.write_mask) - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
op[0] = offset(op[0], bld, fsign_instr->src[0].swizzle[channel]);
|
||||||
|
} else {
|
||||||
assert(!instr->dest.saturate);
|
assert(!instr->dest.saturate);
|
||||||
|
}
|
||||||
|
|
||||||
if (op[0].abs) {
|
if (op[0].abs) {
|
||||||
/* Straightforward since the source can be assumed to be either strictly
|
/* Straightforward since the source can be assumed to be either strictly
|
||||||
* >= 0 or strictly <= 0 depending on the setting of the negate flag.
|
* >= 0 or strictly <= 0 depending on the setting of the negate flag.
|
||||||
*/
|
*/
|
||||||
set_condmod(BRW_CONDITIONAL_NZ, bld.MOV(result, op[0]));
|
set_condmod(BRW_CONDITIONAL_NZ, bld.MOV(result, op[0]));
|
||||||
|
|
||||||
|
if (instr->op == nir_op_fsign) {
|
||||||
inst = (op[0].negate)
|
inst = (op[0].negate)
|
||||||
? bld.MOV(result, brw_imm_f(-1.0f))
|
? bld.MOV(result, brw_imm_f(-1.0f))
|
||||||
: bld.MOV(result, brw_imm_f(1.0f));
|
: bld.MOV(result, brw_imm_f(1.0f));
|
||||||
|
} else {
|
||||||
|
op[1].negate = (op[0].negate != op[1].negate);
|
||||||
|
inst = bld.MOV(result, op[1]);
|
||||||
|
}
|
||||||
|
|
||||||
set_predicate(BRW_PREDICATE_NORMAL, inst);
|
set_predicate(BRW_PREDICATE_NORMAL, inst);
|
||||||
} else if (type_sz(op[0].type) < 8) {
|
} else if (type_sz(op[0].type) < 8) {
|
||||||
|
|
@ -837,7 +883,14 @@ fs_visitor::emit_fsign(const fs_builder &bld, const nir_alu_instr *instr,
|
||||||
result.type = BRW_REGISTER_TYPE_UD;
|
result.type = BRW_REGISTER_TYPE_UD;
|
||||||
bld.AND(result_int, op[0], brw_imm_ud(0x80000000u));
|
bld.AND(result_int, op[0], brw_imm_ud(0x80000000u));
|
||||||
|
|
||||||
|
if (instr->op == nir_op_fsign)
|
||||||
inst = bld.OR(result_int, result_int, brw_imm_ud(0x3f800000u));
|
inst = bld.OR(result_int, result_int, brw_imm_ud(0x3f800000u));
|
||||||
|
else {
|
||||||
|
/* Use XOR here to get the result sign correct. */
|
||||||
|
inst = bld.XOR(result_int, result_int,
|
||||||
|
retype(op[1], BRW_REGISTER_TYPE_UD));
|
||||||
|
}
|
||||||
|
|
||||||
inst->predicate = BRW_PREDICATE_NORMAL;
|
inst->predicate = BRW_PREDICATE_NORMAL;
|
||||||
} else {
|
} else {
|
||||||
/* For doubles we do the same but we need to consider:
|
/* For doubles we do the same but we need to consider:
|
||||||
|
|
@ -857,8 +910,20 @@ fs_visitor::emit_fsign(const fs_builder &bld, const nir_alu_instr *instr,
|
||||||
bld.AND(r, subscript(op[0], BRW_REGISTER_TYPE_UD, 1),
|
bld.AND(r, subscript(op[0], BRW_REGISTER_TYPE_UD, 1),
|
||||||
brw_imm_ud(0x80000000u));
|
brw_imm_ud(0x80000000u));
|
||||||
|
|
||||||
|
if (instr->op == nir_op_fsign) {
|
||||||
set_predicate(BRW_PREDICATE_NORMAL,
|
set_predicate(BRW_PREDICATE_NORMAL,
|
||||||
bld.OR(r, r, brw_imm_ud(0x3ff00000u)));
|
bld.OR(r, r, brw_imm_ud(0x3ff00000u)));
|
||||||
|
} else {
|
||||||
|
/* This could be done better in some cases. If the scale is an
|
||||||
|
* immediate with the low 32-bits all 0, emitting a separate XOR and
|
||||||
|
* OR would allow an algebraic optimization to remove the OR. There
|
||||||
|
* are currently zero instances of fsign(double(x))*IMM in shader-db
|
||||||
|
* or any test suite, so it is hard to care at this time.
|
||||||
|
*/
|
||||||
|
fs_reg result_int64 = retype(result, BRW_REGISTER_TYPE_UQ);
|
||||||
|
inst = bld.XOR(result_int64, result_int64,
|
||||||
|
retype(op[1], BRW_REGISTER_TYPE_UQ));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -994,7 +1059,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_fsign:
|
case nir_op_fsign:
|
||||||
emit_fsign(bld, instr, result, op);
|
emit_fsign(bld, instr, result, op, 0);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_op_frcp:
|
case nir_op_frcp:
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue