From 88ae449dbc8698b62ba586525efeb46ed366118c Mon Sep 17 00:00:00 2001 From: Patrick Lerda Date: Mon, 29 Dec 2025 13:36:39 +0100 Subject: [PATCH] r600: fix rv770 dot4 operations Using a PV register which is not PV.x, after a dot4 operation, does not work on rv770. Anyway, this does work on evergreen but this is not documented. This change updates this behavior for all the r600 gpus which fixes the issue on rv770. It adds max4 which has the same requirement in the case of max4 being implemented. Here are some of the affected tests on rv770: piglit/bin/fp-abs-01 -auto -fbo glcts --deqp-case=KHR-GL31.buffer_objects.triangles piglit/bin/shader_runner generated_tests/spec/glsl-1.10/execution/built-in-functions/fs-distance-vec2-vec2.shader_test -auto -fbo Fixes: 942e6af40bfc ("r600/sfn: use PS and PV inline registers when possible") Signed-off-by: Patrick Lerda Part-of: (cherry picked from commit da1108dcc44cc59a9e358c2bd0f19ffd54209208) --- .pick_status.json | 2 +- .../drivers/r600/sfn/sfn_scheduler.cpp | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 7919ba77656..3990ef261ee 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -2054,7 +2054,7 @@ "description": "r600: fix rv770 dot4 operations", "nominated": true, "nomination_type": 2, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "942e6af40bfc0baefa52217ffb60a2d42adb6c25", "notes": null diff --git a/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp index c6c3b977260..6ee7af82114 100644 --- a/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp @@ -867,9 +867,26 @@ void BlockScheduler::maybe_split_alu_block(Shader::ShaderBlocks& out_blocks) void BlockScheduler::apply_pv_ps_to_group(AluGroup& group, AluGroup& prev_group) { + bool need_force_pvx = false; + + if ((prev_group.free_slot_mask() & 0xf) == 0x0 && + !prev_group[0]->has_alu_flag(alu_is_lds)) { + switch (prev_group[0]->opcode()) { + case op2_dot4: + case op2_dot4_ieee: + case op1_max4: + need_force_pvx = true; + break; + default: + break; + } + } for (int i = 0; i < 4; ++i) - apply_pv_ps_to_instr(group, prev_group[i], ALU_SRC_PV, i); + apply_pv_ps_to_instr(group, + prev_group[i], + ALU_SRC_PV, + unlikely(need_force_pvx) ? 0 : i); if (prev_group.has_t()) apply_pv_ps_to_instr(group, prev_group[4], ALU_SRC_PS, 0);