mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-01 01:00:10 +01:00
aco: don't combine mul+add_clamp to mad_clamp
The result is not the same if the multiplication overflows, mad_clamp does not truncate between the mul and the add. Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28421>
This commit is contained in:
parent
db29984c25
commit
51a5ebbd01
1 changed files with 9 additions and 6 deletions
|
|
@ -3849,6 +3849,8 @@ combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
bool fadd = instr->opcode == aco_opcode::v_pk_add_f16;
|
||||
if (fadd && instr->definitions[0].isPrecise())
|
||||
return;
|
||||
if (!fadd && instr->valu().clamp)
|
||||
return;
|
||||
|
||||
Instruction* mul_instr = nullptr;
|
||||
unsigned add_op_idx = 0;
|
||||
|
|
@ -4502,22 +4504,22 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
}
|
||||
} else if (instr->opcode == aco_opcode::v_not_b32 && ctx.program->gfx_level >= GFX10) {
|
||||
combine_not_xor(ctx, instr);
|
||||
} else if (instr->opcode == aco_opcode::v_add_u16) {
|
||||
} else if (instr->opcode == aco_opcode::v_add_u16 && !instr->valu().clamp) {
|
||||
combine_three_valu_op(
|
||||
ctx, instr, aco_opcode::v_mul_lo_u16,
|
||||
ctx.program->gfx_level == GFX8 ? aco_opcode::v_mad_legacy_u16 : aco_opcode::v_mad_u16,
|
||||
"120", 1 | 2);
|
||||
} else if (instr->opcode == aco_opcode::v_add_u16_e64) {
|
||||
} else if (instr->opcode == aco_opcode::v_add_u16_e64 && !instr->valu().clamp) {
|
||||
combine_three_valu_op(ctx, instr, aco_opcode::v_mul_lo_u16_e64, aco_opcode::v_mad_u16, "120",
|
||||
1 | 2);
|
||||
} else if (instr->opcode == aco_opcode::v_add_u32) {
|
||||
} else if (instr->opcode == aco_opcode::v_add_u32 && !instr->usesModifiers()) {
|
||||
if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_addc_co_u32, 1 | 2)) {
|
||||
} else if (combine_add_bcnt(ctx, instr)) {
|
||||
} else if (combine_three_valu_op(ctx, instr, aco_opcode::v_mul_u32_u24,
|
||||
aco_opcode::v_mad_u32_u24, "120", 1 | 2)) {
|
||||
} else if (combine_three_valu_op(ctx, instr, aco_opcode::v_mul_i32_i24,
|
||||
aco_opcode::v_mad_i32_i24, "120", 1 | 2)) {
|
||||
} else if (ctx.program->gfx_level >= GFX9 && !instr->usesModifiers()) {
|
||||
} else if (ctx.program->gfx_level >= GFX9) {
|
||||
if (combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, aco_opcode::v_xad_u32, "120",
|
||||
1 | 2)) {
|
||||
} else if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, aco_opcode::v_xad_u32,
|
||||
|
|
@ -4531,8 +4533,9 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
} else if (combine_add_or_then_and_lshl(ctx, instr)) {
|
||||
}
|
||||
}
|
||||
} else if (instr->opcode == aco_opcode::v_add_co_u32 ||
|
||||
instr->opcode == aco_opcode::v_add_co_u32_e64) {
|
||||
} else if ((instr->opcode == aco_opcode::v_add_co_u32 ||
|
||||
instr->opcode == aco_opcode::v_add_co_u32_e64) &&
|
||||
!instr->usesModifiers()) {
|
||||
bool carry_out = ctx.uses[instr->definitions[1].tempId()] > 0;
|
||||
if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_addc_co_u32, 1 | 2)) {
|
||||
} else if (!carry_out && combine_add_bcnt(ctx, instr)) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue