From 20b252c4c8ba23b3bb2d6cd0a4db7930b74ddb85 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Tue, 18 Apr 2023 14:50:18 +0100 Subject: [PATCH] aco: remove SMEM_instruction::prevent_overflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This doesn't seem useful anymore, and it seems we forgot to set it in a few places. This commit changes the behaviour of the optimizer so that prevent_overflow is always true. fossil-db (navi21): Totals from 7421 (5.47% of 135636) affected shaders: Instrs: 5402823 -> 5440126 (+0.69%); split: -0.00%, +0.69% CodeSize: 28731300 -> 28974152 (+0.85%); split: -0.00%, +0.85% VGPRs: 317528 -> 317552 (+0.01%) SpillSGPRs: 419 -> 415 (-0.95%) Latency: 40712478 -> 40783115 (+0.17%); split: -0.01%, +0.19% InvThroughput: 7612708 -> 7616751 (+0.05%); split: -0.00%, +0.06% VClause: 123824 -> 123848 (+0.02%); split: -0.09%, +0.11% SClause: 161915 -> 172741 (+6.69%); split: -0.03%, +6.71% Copies: 393015 -> 394429 (+0.36%); split: -0.20%, +0.56% PreSGPRs: 288658 -> 289603 (+0.33%); split: -0.04%, +0.36% Signed-off-by: Rhys Perry Reviewed-by: Qiang Yu Reviewed-by: Timur Kristóf Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8864 Cc: mesa-stable Part-of: (cherry picked from commit 1a6095b36e9a5959abfe751a86377ecff745453a) --- .pick_status.json | 2 +- src/amd/compiler/aco_instruction_selection.cpp | 2 +- src/amd/compiler/aco_ir.h | 3 +-- src/amd/compiler/aco_opt_value_numbering.cpp | 4 +--- src/amd/compiler/aco_optimizer.cpp | 3 +-- 5 files changed, 5 insertions(+), 9 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 8b9eebe3774..c3c3cc5c4d7 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -3174,7 +3174,7 @@ "description": "aco: remove SMEM_instruction::prevent_overflow", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 8f9b43c6a8b..edf270972d7 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -6020,7 +6020,7 @@ visit_load_push_constant(isel_context* ctx, nir_intrinsic_instr* instr) default: unreachable("unimplemented or forbidden load_push_constant."); } - bld.smem(op, Definition(vec), ptr, index)->smem().prevent_overflow = true; + bld.smem(op, Definition(vec), ptr, index); if (!aligned) { Operand byte_offset = index_cv ? Operand::c32((offset + index_cv->u32) % 4) : Operand(index); diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index baf64b04267..a79d0bd2cf9 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1428,8 +1428,7 @@ struct SMEM_instruction : public Instruction { bool dlc : 1; /* NAVI: device level coherent */ bool nv : 1; /* VEGA only: Non-volatile */ bool disable_wqm : 1; - bool prevent_overflow : 1; /* avoid overflow when combining additions */ - uint8_t padding : 3; + uint8_t padding : 4; }; static_assert(sizeof(SMEM_instruction) == sizeof(Instruction) + 4, "Unexpected padding"); diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp index db977fe486b..1a8e296f29a 100644 --- a/src/amd/compiler/aco_opt_value_numbering.cpp +++ b/src/amd/compiler/aco_opt_value_numbering.cpp @@ -218,10 +218,8 @@ struct InstrPred { case Format::SMEM: { SMEM_instruction& aS = a->smem(); SMEM_instruction& bS = b->smem(); - /* isel shouldn't be creating situations where this assertion fails */ - assert(aS.prevent_overflow == bS.prevent_overflow); return aS.sync == bS.sync && aS.glc == bS.glc && aS.dlc == bS.dlc && aS.nv == bS.nv && - aS.disable_wqm == bS.disable_wqm && aS.prevent_overflow == bS.prevent_overflow; + aS.disable_wqm == bS.disable_wqm; } case Format::VINTRP: { VINTRP_instruction& aI = a->vintrp(); diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 31129246a97..d5b199216f5 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -879,13 +879,12 @@ smem_combine(opt_ctx& ctx, aco_ptr& instr) Temp base; uint32_t offset; - bool prevent_overflow = smem.operands[0].size() > 2 || smem.prevent_overflow; if (info.is_constant_or_literal(32) && ((ctx.program->gfx_level == GFX6 && info.val <= 0x3FF) || (ctx.program->gfx_level == GFX7 && info.val <= 0xFFFFFFFF) || (ctx.program->gfx_level >= GFX8 && info.val <= 0xFFFFF))) { instr->operands[1] = Operand::c32(info.val); - } else if (parse_base_offset(ctx, instr.get(), 1, &base, &offset, prevent_overflow) && + } else if (parse_base_offset(ctx, instr.get(), 1, &base, &offset, true) && base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->gfx_level >= GFX9 && offset % 4u == 0) { bool soe = smem.operands.size() >= (!smem.definitions.empty() ? 3 : 4);