diff --git a/.pick_status.json b/.pick_status.json index 8b9eebe3774..c3c3cc5c4d7 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -3174,7 +3174,7 @@ "description": "aco: remove SMEM_instruction::prevent_overflow", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 8f9b43c6a8b..edf270972d7 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -6020,7 +6020,7 @@ visit_load_push_constant(isel_context* ctx, nir_intrinsic_instr* instr) default: unreachable("unimplemented or forbidden load_push_constant."); } - bld.smem(op, Definition(vec), ptr, index)->smem().prevent_overflow = true; + bld.smem(op, Definition(vec), ptr, index); if (!aligned) { Operand byte_offset = index_cv ? Operand::c32((offset + index_cv->u32) % 4) : Operand(index); diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index baf64b04267..a79d0bd2cf9 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1428,8 +1428,7 @@ struct SMEM_instruction : public Instruction { bool dlc : 1; /* NAVI: device level coherent */ bool nv : 1; /* VEGA only: Non-volatile */ bool disable_wqm : 1; - bool prevent_overflow : 1; /* avoid overflow when combining additions */ - uint8_t padding : 3; + uint8_t padding : 4; }; static_assert(sizeof(SMEM_instruction) == sizeof(Instruction) + 4, "Unexpected padding"); diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp index db977fe486b..1a8e296f29a 100644 --- a/src/amd/compiler/aco_opt_value_numbering.cpp +++ b/src/amd/compiler/aco_opt_value_numbering.cpp @@ -218,10 +218,8 @@ struct InstrPred { case Format::SMEM: { SMEM_instruction& aS = a->smem(); SMEM_instruction& bS = b->smem(); - /* isel shouldn't be creating situations where this assertion fails */ - assert(aS.prevent_overflow == bS.prevent_overflow); return aS.sync == bS.sync && aS.glc == bS.glc && aS.dlc == bS.dlc && aS.nv == bS.nv && - aS.disable_wqm == bS.disable_wqm && aS.prevent_overflow == bS.prevent_overflow; + aS.disable_wqm == bS.disable_wqm; } case Format::VINTRP: { VINTRP_instruction& aI = a->vintrp(); diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 31129246a97..d5b199216f5 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -879,13 +879,12 @@ smem_combine(opt_ctx& ctx, aco_ptr& instr) Temp base; uint32_t offset; - bool prevent_overflow = smem.operands[0].size() > 2 || smem.prevent_overflow; if (info.is_constant_or_literal(32) && ((ctx.program->gfx_level == GFX6 && info.val <= 0x3FF) || (ctx.program->gfx_level == GFX7 && info.val <= 0xFFFFFFFF) || (ctx.program->gfx_level >= GFX8 && info.val <= 0xFFFFF))) { instr->operands[1] = Operand::c32(info.val); - } else if (parse_base_offset(ctx, instr.get(), 1, &base, &offset, prevent_overflow) && + } else if (parse_base_offset(ctx, instr.get(), 1, &base, &offset, true) && base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->gfx_level >= GFX9 && offset % 4u == 0) { bool soe = smem.operands.size() >= (!smem.definitions.empty() ? 3 : 4);