diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 05abb20e4ee..99be3539ee9 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -1902,7 +1902,20 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) } case nir_op_iadd_sat: { Temp src0 = get_alu_src(ctx, instr->src[0]); - Temp src1 = as_vgpr(ctx, get_alu_src(ctx, instr->src[1])); + Temp src1 = get_alu_src(ctx, instr->src[1]); + if (dst.regClass() == s1) { + Temp cond = bld.sopc(aco_opcode::s_cmp_lt_i32, bld.def(s1, scc), src1, Operand::zero()); + Temp bound = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(bld.def(s1, scc)), + Operand::c32(INT32_MAX), cond); + Temp overflow = bld.tmp(s1); + Temp add = + bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.scc(Definition(overflow)), src0, src1); + bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), bound, add, bld.scc(overflow)); + break; + } + + src1 = as_vgpr(ctx, src1); + if (dst.regClass() == v2b) { Instruction* add_instr = bld.vop3(aco_opcode::v_add_i16, Definition(dst), src0, src1).instr; diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index 5e1b068d697..6d0efade16b 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -534,7 +534,6 @@ init_context(isel_context* ctx, nir_shader* shader) case nir_op_cube_face_index_amd: case nir_op_cube_face_coord_amd: case nir_op_sad_u8x4: - case nir_op_iadd_sat: case nir_op_udot_4x8_uadd: case nir_op_sdot_4x8_iadd: case nir_op_udot_4x8_uadd_sat: