From c81fe5b235c0204e9f77d13411004a3307aa6301 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Tue, 23 Aug 2022 15:46:36 +0200 Subject: [PATCH] r600/sfn: Use a heuristic to keep SSBO setup and store close When SSBO instructions use constant address values the address loading is immediately ready, scheduling the address loads early increases the register pressure, so force a new instruction block to work around this problem. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6975 Fixes: 79ca456b4837b3bc21cf9ef3c03c505c4b4909f6 r600/sfn: rewrite NIR backend v2: do handling in shader block to be thread save (hinted to by Filip) Signed-off-by: Gert Wollny Reviewed-by: Filip Gawin (v1) Part-of: --- src/gallium/drivers/r600/sfn/sfn_instr.h | 3 +++ src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp | 6 ------ src/gallium/drivers/r600/sfn/sfn_instr_mem.h | 1 - src/gallium/drivers/r600/sfn/sfn_shader.cpp | 3 +++ 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/r600/sfn/sfn_instr.h b/src/gallium/drivers/r600/sfn/sfn_instr.h index a715305d7cf..d3beb8f40cd 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr.h +++ b/src/gallium/drivers/r600/sfn/sfn_instr.h @@ -210,6 +210,8 @@ public: bool kcache_reservation_failed() const { return m_kcache_alloc_failed;} + int inc_rat_emitted() { return ++m_emitted_rat_instr;} + static void set_chipclass(r600_chip_class chip_class); private: @@ -234,6 +236,7 @@ private: int m_lds_group_requirement{0}; AluInstr *m_lds_group_start{nullptr}; static unsigned s_max_kcache_banks; + int m_emitted_rat_instr{0}; }; class InstrWithVectorResult : public Instr { diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp index 65e52caa33e..4b0d835de24 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_mem.cpp @@ -559,12 +559,6 @@ bool RatInstr::emit_ssbo_load(nir_intrinsic_instr *intr, Shader& shader) bool RatInstr::emit_ssbo_store(nir_intrinsic_instr *instr, Shader& shader) { - - /* Forche the scheduler to not move the preparation too far away, by starting - * a new block (TODO: better priority handling in the scheduler)*/ - if (nir_src_num_components(instr->src[0]) > 2) - shader.start_new_block(0); - auto &vf = shader.value_factory(); auto orig_addr = vf.src(instr->src[2], 0); diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_mem.h b/src/gallium/drivers/r600/sfn/sfn_instr_mem.h index 0aece6bac73..9d0132dc1af 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_mem.h +++ b/src/gallium/drivers/r600/sfn/sfn_instr_mem.h @@ -194,7 +194,6 @@ private: int m_element_size{3}; bool m_need_ack{false}; bool m_need_mark{false}; - }; diff --git a/src/gallium/drivers/r600/sfn/sfn_shader.cpp b/src/gallium/drivers/r600/sfn/sfn_shader.cpp index ce2ca43012f..c0a68569ea5 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader.cpp @@ -1085,6 +1085,9 @@ void Shader::InstructionChain::visit(RatInstr *instr) if (prepare_mem_barrier) instr->set_ack(); + + if (this_shader->m_current_block->inc_rat_emitted() > 15) + this_shader->start_new_block(0); } void Shader::InstructionChain::apply(Instr *current, Instr **last) {