From b1b5a0c6ad04a2f69d980a416be4883e02a44142 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Fri, 22 Sep 2023 09:00:49 +0200 Subject: [PATCH] aco/gfx11.5: select SALU fsat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 6 ++++++ src/amd/compiler/aco_instruction_selection_setup.cpp | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 8db0148a020..f5faa7fb353 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -2651,6 +2651,12 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) Instruction* add = bld.vop3(aco_opcode::v_add_f64_e64, Definition(dst), src, Operand::zero()); add->valu().clamp = true; + } else if (dst.regClass() == s1 && instr->def.bit_size == 16) { + Temp low = bld.sop2(aco_opcode::s_max_f16, bld.def(s1), src, Operand::c16(0)); + bld.sop2(aco_opcode::s_min_f16, Definition(dst), low, Operand::c16(0x3C00)); + } else if (dst.regClass() == s1 && instr->def.bit_size == 32) { + Temp low = bld.sop2(aco_opcode::s_max_f32, bld.def(s1), src, Operand::c32(0)); + bld.sop2(aco_opcode::s_min_f32, Definition(dst), low, Operand::c32(0x3f800000)); } else { isel_err(&instr->instr, "Unimplemented NIR instr bit size"); } diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index c1b358d9a97..f26778aed77 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -332,7 +332,6 @@ init_context(isel_context* ctx, nir_shader* shader) case nir_op_ffmaz: case nir_op_fneg: case nir_op_fabs: - case nir_op_fsat: case nir_op_fsign: case nir_op_frcp: case nir_op_frsq: @@ -377,6 +376,7 @@ init_context(isel_context* ctx, nir_shader* shader) case nir_op_fsub: case nir_op_fmax: case nir_op_fmin: + case nir_op_fsat: case nir_op_i2f16: case nir_op_i2f32: case nir_op_u2f16: