From da1108dcc44cc59a9e358c2bd0f19ffd54209208 Mon Sep 17 00:00:00 2001 From: Patrick Lerda Date: Mon, 29 Dec 2025 13:36:39 +0100 Subject: [PATCH] r600: fix rv770 dot4 operations Using a PV register which is not PV.x, after a dot4 operation, does not work on rv770. Anyway, this does work on evergreen but this is not documented. This change updates this behavior for all the r600 gpus which fixes the issue on rv770. It adds max4 which has the same requirement in the case of max4 being implemented. Here are some of the affected tests on rv770: piglit/bin/fp-abs-01 -auto -fbo glcts --deqp-case=KHR-GL31.buffer_objects.triangles piglit/bin/shader_runner generated_tests/spec/glsl-1.10/execution/built-in-functions/fs-distance-vec2-vec2.shader_test -auto -fbo Fixes: 942e6af40bfc ("r600/sfn: use PS and PV inline registers when possible") Signed-off-by: Patrick Lerda Part-of: --- .../drivers/r600/sfn/sfn_scheduler.cpp | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp index c6c3b977260..6ee7af82114 100644 --- a/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp @@ -867,9 +867,26 @@ void BlockScheduler::maybe_split_alu_block(Shader::ShaderBlocks& out_blocks) void BlockScheduler::apply_pv_ps_to_group(AluGroup& group, AluGroup& prev_group) { + bool need_force_pvx = false; + + if ((prev_group.free_slot_mask() & 0xf) == 0x0 && + !prev_group[0]->has_alu_flag(alu_is_lds)) { + switch (prev_group[0]->opcode()) { + case op2_dot4: + case op2_dot4_ieee: + case op1_max4: + need_force_pvx = true; + break; + default: + break; + } + } for (int i = 0; i < 4; ++i) - apply_pv_ps_to_instr(group, prev_group[i], ALU_SRC_PV, i); + apply_pv_ps_to_instr(group, + prev_group[i], + ALU_SRC_PV, + unlikely(need_force_pvx) ? 0 : i); if (prev_group.has_t()) apply_pv_ps_to_instr(group, prev_group[4], ALU_SRC_PS, 0);