mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 19:30:11 +01:00
aco: fix combine_constant_comparison_ordering() NaN check with 16/64-bit
No fossil-db changes. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7045>
This commit is contained in:
parent
d4c821da0e
commit
91ffeed88a
2 changed files with 96 additions and 16 deletions
|
|
@ -1804,6 +1804,31 @@ bool combine_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& instr)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool is_operand_constant(opt_ctx &ctx, Operand op, unsigned bit_size, uint64_t *value)
|
||||
{
|
||||
if (op.isConstant()) {
|
||||
*value = op.constantValue64();
|
||||
return true;
|
||||
} else if (op.isTemp()) {
|
||||
unsigned id = original_temp_id(ctx, op.getTemp());
|
||||
if (!ctx.info[id].is_constant_or_literal(bit_size))
|
||||
return false;
|
||||
*value = get_constant_op(ctx, ctx.info[id], bit_size).constantValue64();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool is_constant_nan(uint64_t value, unsigned bit_size)
|
||||
{
|
||||
if (bit_size == 16)
|
||||
return ((value >> 10) & 0x1f) == 0x1f && (value & 0x3ff);
|
||||
else if (bit_size == 32)
|
||||
return ((value >> 23) & 0xff) == 0xff && (value & 0x7fffff);
|
||||
else
|
||||
return ((value >> 52) & 0x7ff) == 0x7ff && (value & 0xfffffffffffff);
|
||||
}
|
||||
|
||||
/* s_or_b64(v_cmp_neq_f32(a, a), cmp(a, #b)) and b is not NaN -> get_unordered(cmp)(a, b)
|
||||
* s_and_b64(v_cmp_eq_f32(a, a), cmp(a, #b)) and b is not NaN -> get_ordered(cmp)(a, b) */
|
||||
bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& instr)
|
||||
|
|
@ -1829,7 +1854,8 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& in
|
|||
else if (get_f32_cmp(nan_test->opcode) != expected_nan_test)
|
||||
return false;
|
||||
|
||||
if (!is_cmp(cmp->opcode) || get_cmp_bitsize(cmp->opcode) != get_cmp_bitsize(nan_test->opcode))
|
||||
unsigned bit_size = get_cmp_bitsize(cmp->opcode);
|
||||
if (!is_cmp(cmp->opcode) || get_cmp_bitsize(nan_test->opcode) != bit_size)
|
||||
return false;
|
||||
|
||||
if (!nan_test->operands[0].isTemp() || !nan_test->operands[1].isTemp())
|
||||
|
|
@ -1858,22 +1884,10 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& in
|
|||
if (constant_operand == -1)
|
||||
return false;
|
||||
|
||||
uint32_t constant;
|
||||
if (cmp->operands[constant_operand].isConstant()) {
|
||||
constant = cmp->operands[constant_operand].constantValue();
|
||||
} else if (cmp->operands[constant_operand].isTemp()) {
|
||||
Temp tmp = cmp->operands[constant_operand].getTemp();
|
||||
unsigned id = original_temp_id(ctx, tmp);
|
||||
if (!ctx.info[id].is_constant_or_literal(32))
|
||||
return false;
|
||||
constant = ctx.info[id].val;
|
||||
} else {
|
||||
uint64_t constant_value;
|
||||
if (!is_operand_constant(ctx, cmp->operands[constant_operand], bit_size, &constant_value))
|
||||
return false;
|
||||
}
|
||||
|
||||
float constantf;
|
||||
memcpy(&constantf, &constant, 4);
|
||||
if (isnan(constantf))
|
||||
if (is_constant_nan(constant_value, bit_size))
|
||||
return false;
|
||||
|
||||
if (cmp->operands[0].isTemp())
|
||||
|
|
|
|||
|
|
@ -299,3 +299,69 @@ BEGIN_TEST(optimize.clamp)
|
|||
|
||||
finish_opt_test();
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(optimize.const_comparison_ordering)
|
||||
//>> v1: %a, v1: %b, v2: %c, v1: %d, s2: %_:exec = p_startpgm
|
||||
if (!setup_cs("v1 v1 v2 v1", GFX9))
|
||||
return;
|
||||
|
||||
/* optimize to unordered comparison */
|
||||
//! s2: %res0 = v_cmp_nge_f32 4.0, %a
|
||||
//! p_unit_test 0, %res0
|
||||
writeout(0, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc),
|
||||
bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]),
|
||||
bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0])));
|
||||
|
||||
//! s2: %res1 = v_cmp_nge_f32 4.0, %a
|
||||
//! p_unit_test 1, %res1
|
||||
writeout(1, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc),
|
||||
bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]),
|
||||
bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0])));
|
||||
|
||||
//! s2: %res2 = v_cmp_nge_f32 0x40a00000, %a
|
||||
//! p_unit_test 2, %res2
|
||||
writeout(2, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc),
|
||||
bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]),
|
||||
bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), bld.copy(bld.def(v1), Operand(0x40a00000u)), inputs[0])));
|
||||
|
||||
/* optimize to ordered comparison */
|
||||
//! s2: %res3 = v_cmp_lt_f32 4.0, %a
|
||||
//! p_unit_test 3, %res3
|
||||
writeout(3, bld.sop2(aco_opcode::s_and_b64, bld.def(bld.lm), bld.def(s1, scc),
|
||||
bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), inputs[0], inputs[0]),
|
||||
bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0])));
|
||||
|
||||
//! s2: %res4 = v_cmp_lt_f32 4.0, %a
|
||||
//! p_unit_test 4, %res4
|
||||
writeout(4, bld.sop2(aco_opcode::s_and_b64, bld.def(bld.lm), bld.def(s1, scc),
|
||||
bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), inputs[0], inputs[0]),
|
||||
bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0])));
|
||||
|
||||
//! s2: %res5 = v_cmp_lt_f32 0x40a00000, %a
|
||||
//! p_unit_test 5, %res5
|
||||
writeout(5, bld.sop2(aco_opcode::s_and_b64, bld.def(bld.lm), bld.def(s1, scc),
|
||||
bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), inputs[0], inputs[0]),
|
||||
bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm), bld.copy(bld.def(v1), Operand(0x40a00000u)), inputs[0])));
|
||||
|
||||
/* NaN */
|
||||
uint16_t nan16 = 0x7e00;
|
||||
uint32_t nan32 = 0x7fc00000;
|
||||
|
||||
//! s2: %tmp6_0 = v_cmp_lt_f16 0x7e00, %a
|
||||
//! s2: %tmp6_1 = v_cmp_neq_f16 %a, %a
|
||||
//! s2: %res6, s1: %_:scc = s_or_b64 %tmp6_1, %tmp6_0
|
||||
//! p_unit_test 6, %res6
|
||||
writeout(6, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc),
|
||||
bld.vopc(aco_opcode::v_cmp_neq_f16, bld.def(bld.lm), inputs[0], inputs[0]),
|
||||
bld.vopc(aco_opcode::v_cmp_lt_f16, bld.def(bld.lm), Operand(nan16), inputs[0])));
|
||||
|
||||
//! s2: %tmp7_0 = v_cmp_lt_f32 0x7fc00000, %a
|
||||
//! s2: %tmp7_1 = v_cmp_neq_f32 %a, %a
|
||||
//! s2: %res7, s1: %_:scc = s_or_b64 %tmp7_1, %tmp7_0
|
||||
//! p_unit_test 7, %res7
|
||||
writeout(7, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc),
|
||||
bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]),
|
||||
bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(nan32), inputs[0])));
|
||||
|
||||
finish_opt_test();
|
||||
END_TEST
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue