mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 02:38:04 +02:00
aco: improve fsign selection
Idea from https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6284 fossil-db (Navi): Totals from 4053 (2.95% of 137413) affected shaders: SGPRs: 305810 -> 305906 (+0.03%); split: -0.01%, +0.04% VGPRs: 249000 -> 249144 (+0.06%); split: -0.01%, +0.07% CodeSize: 29967092 -> 29885768 (-0.27%); split: -0.27%, +0.00% Instrs: 5749494 -> 5737971 (-0.20%); split: -0.20%, +0.00% Cycles: 255028584 -> 254955444 (-0.03%); split: -0.04%, +0.01% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6583>
This commit is contained in:
parent
0e9425a1b7
commit
6049dc1a9d
1 changed files with 8 additions and 10 deletions
|
|
@ -2127,17 +2127,15 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
|
|||
case nir_op_fsign: {
|
||||
Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0]));
|
||||
if (dst.regClass() == v2b) {
|
||||
Temp one = bld.copy(bld.def(v1), Operand(0x3c00u));
|
||||
Temp minus_one = bld.copy(bld.def(v1), Operand(0xbc00u));
|
||||
Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f16, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src);
|
||||
src = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), one, src, cond);
|
||||
cond = bld.vopc(aco_opcode::v_cmp_le_f16, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src);
|
||||
bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), minus_one, src, cond);
|
||||
assert(ctx->program->chip_class >= GFX9);
|
||||
/* replace negative zero with positive zero */
|
||||
src = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), Operand(0u), src);
|
||||
src = bld.vop3(aco_opcode::v_med3_i16, bld.def(v2b), Operand((uint16_t)-1), src, Operand((uint16_t)1u));
|
||||
bld.vop1(aco_opcode::v_cvt_f16_i16, Definition(dst), src);
|
||||
} else if (dst.regClass() == v1) {
|
||||
Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f32, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src);
|
||||
src = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0x3f800000u), src, cond);
|
||||
cond = bld.vopc(aco_opcode::v_cmp_le_f32, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src);
|
||||
bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), Operand(0xbf800000u), src, cond);
|
||||
src = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand(0u), src);
|
||||
src = bld.vop3(aco_opcode::v_med3_i32, bld.def(v1), Operand((uint32_t)-1), src, Operand(1u));
|
||||
bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(dst), src);
|
||||
} else if (dst.regClass() == v2) {
|
||||
Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f64, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src);
|
||||
Temp tmp = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1), Operand(0x3FF00000u));
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue