From 8f0b387d9444a29b9f34cd85b190ddbc38deb935 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Tue, 27 Sep 2022 15:28:48 -0700 Subject: [PATCH] intel/fs: Fix src and dst types of LOAD_PAYLOAD ACP entries during copy propagation. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ACP entries created by copy propagation to track the implied copies of LOAD_PAYLOAD instructions don't model the behavior of LOAD_PAYLOAD correctly, since (as of 41868bb6824c6106a55c844) header moves are implicitly retyped to UD and the destination of non-header copies implicitly uses the same type as the corresponding source, even though the ACP entries created for such copies could incorrectly represent a type conversion, which can lead to mis-optimization of the program. According to Marcin, this fixes the func.mesh.ext.workgroup_id.task.q0 crucible test. Fixes: 41868bb6824c6106a55c844 ("i965/fs: Rework the fs_visitor LOAD_PAYLOAD instruction") Reported-by: Marcin Ślusarz Tested-by: Marcin Ślusarz Reviewed-by: Ian Romanick Part-of: (cherry picked from commit 7b5e9336298b00eeffeec738adbcc0acc7683868) --- .pick_status.json | 2 +- src/intel/compiler/brw_fs_copy_propagation.cpp | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 5a3d3d344ec..023ca79498b 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -265,7 +265,7 @@ "description": "intel/fs: Fix src and dst types of LOAD_PAYLOAD ACP entries during copy propagation.", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "41868bb6824c6106a55c8442006c1e2215abf567" }, diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index 9819b5dcbca..457d80f6972 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -1085,9 +1085,11 @@ fs_visitor::opt_copy_propagation_local(void *copy_prop_ctx, bblock_t *block, if (inst->src[i].file == VGRF || (inst->src[i].file == FIXED_GRF && inst->src[i].is_contiguous())) { + const brw_reg_type t = i < inst->header_size ? + BRW_REGISTER_TYPE_UD : inst->src[i].type; acp_entry *entry = rzalloc(copy_prop_ctx, acp_entry); - entry->dst = byte_offset(inst->dst, offset); - entry->src = inst->src[i]; + entry->dst = byte_offset(retype(inst->dst, t), offset); + entry->src = retype(inst->src[i], t); entry->size_written = size_written; entry->size_read = inst->size_read(i); entry->opcode = inst->opcode;