From 64cae5c48d1296dfde07d5cd6cc5e9dbe69fbc84 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Tue, 21 Jan 2025 01:12:58 +0100 Subject: [PATCH] aco: form mixed MTBUF/MUBUF clauses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This should be one clause (all of the instructions load from the same vertex buffer) s_clause 0x2 ; bfa10002 tbuffer_load_format_xyzw v[8:11], v5, s[4:7], 0 format:[BUF_FMT_8_8_8_8_UNORM] idxen offset:36 ; e9c32024 80010805 tbuffer_load_format_xyzw v[12:15], v5, s[4:7], 0 format:[BUF_FMT_8_8_8_8_UNORM] idxen offset:16 ; e9c32010 80010c05 tbuffer_load_format_xyzw v[16:19], v5, s[4:7], 0 format:[BUF_FMT_8_8_8_8_UNORM] idxen offset:12 ; e9c3200c 80011005 s_clause 0x2 ; bfa10002 buffer_load_dwordx3 v[20:22], v5, s[4:7], 0 idxen ; e03c2000 80011405 buffer_load_dwordx3 v[23:25], v5, s[4:7], 0 idxen offset:20 ; e03c2014 80011705 buffer_load_dwordx4 v[28:31], v5, s[4:7], 0 idxen offset:48 ; e0382030 80011c05 tbuffer_load_format_xy v[0:1], v5, s[4:7], 0 format:[BUF_FMT_8_8_UNORM] idxen offset:32 ; e8712020 80010005 Foz-DB Navi21: Totals from 5624 (7.08% of 79395) affected shaders: MaxWaves: 149894 -> 149898 (+0.00%) Instrs: 3032697 -> 3034853 (+0.07%); split: -0.05%, +0.12% CodeSize: 15907852 -> 15915752 (+0.05%); split: -0.05%, +0.10% VGPRs: 216248 -> 216144 (-0.05%) Latency: 10955137 -> 11008760 (+0.49%); split: -0.22%, +0.70% InvThroughput: 2032857 -> 2033916 (+0.05%); split: -0.03%, +0.08% VClause: 50120 -> 41778 (-16.64%); split: -16.66%, +0.02% SClause: 62034 -> 62004 (-0.05%); split: -0.33%, +0.29% Copies: 253836 -> 254505 (+0.26%); split: -0.17%, +0.43% VALU: 1621606 -> 1622274 (+0.04%); split: -0.03%, +0.07% SALU: 653251 -> 653252 (+0.00%) Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_ir.cpp | 5 ++++- src/amd/compiler/tests/test_hard_clause.cpp | 12 +++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index a678d05100e..fd7da14aa44 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -1380,8 +1380,11 @@ should_form_clause(const Instruction* a, const Instruction* b) if (a->definitions.empty() != b->definitions.empty()) return false; - if (a->format != b->format) + /* MUBUF and MTBUF can appear in the same clause. */ + if ((a->isMTBUF() && b->isMUBUF()) || (a->isMUBUF() && b->isMTBUF())) { + } else if (a->format != b->format) { return false; + } if (a->operands.empty() || b->operands.empty()) return false; diff --git a/src/amd/compiler/tests/test_hard_clause.cpp b/src/amd/compiler/tests/test_hard_clause.cpp index 82cc3fd97c4..50bf52ea889 100644 --- a/src/amd/compiler/tests/test_hard_clause.cpp +++ b/src/amd/compiler/tests/test_hard_clause.cpp @@ -282,16 +282,22 @@ BEGIN_TEST(form_hard_clauses.heuristic) create_smem_buffer(buf_desc0); create_smem(); - /* Only form clause between MUBUF and MTBUF if they load from the same binding. Ignore descriptor - * if they're te same binding. - */ + /* Form clause with MTBUF/MUBUF mix if they use the same descriptor. */ //>> p_unit_test 7 + //! s_clause imm:1 //; search_re('buffer_load_dword') //; search_re('tbuffer_load_format_x') bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u)); create_mubuf(buf_desc0); create_mtbuf(buf_desc0); + //>> p_unit_test 8 + //; search_re('buffer_load_dword') + //; search_re('tbuffer_load_format_x') + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u)); + create_mubuf(buf_desc0); + create_mtbuf(buf_desc1); + finish_form_hard_clause_test(); END_TEST