From 267502f9f3fe4de6268168acacee3887ce788cfa Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 20 Feb 2025 15:23:04 +0200 Subject: [PATCH] brw: fix spilling for Xe2+ The problem occurs with a series of instructions build the subgroup invocation value : mov(8) g23<1>UW 0x76543210V add(8) g23.8<1>UW g23<8,8,1>UW 0x0008UW add(16) g23.16<1>UW g23<16,16,1>UW 0x0010UW Our register spilling code operates on physical registers (64B on Xe2+) and using the brw_inst::is_partial_write() helper only considers 32B registers. So the spiller doesn't see that the add(16) instruction is doing a partial write and ends up discarding the previous value. You can reproduce the issue by running a test like : INTEL_DEBUG=spill_fs ./deqp-vk -n dEQP-VK.compute.pipeline.cooperative_matrix.khr_a.subgroupscope.constant.uint8_uint8.buffer.rowmajor.linear Signed-off-by: Lionel Landwerlin Fixes: aa494cbacf ("brw: align spilling offsets to physical register sizes") Reviewed-by: Paulo Zanoni Part-of: (cherry picked from commit c60180ba6333c6f6edcd2bd6a0166334082bc378) --- .pick_status.json | 2 +- src/intel/compiler/brw_fs.cpp | 6 +++--- src/intel/compiler/brw_ir_fs.h | 2 +- src/intel/compiler/brw_reg_allocate.cpp | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index b65fa093df6..1abdf577bc0 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -964,7 +964,7 @@ "description": "brw: fix spilling for Xe2+", "nominated": true, "nomination_type": 2, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "aa494cbacf3bfa57163bbed8b5552ad25434e713", "notes": null diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index d0400fc7eb8..d63cef0fdb5 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -482,7 +482,7 @@ fs_visitor::limit_dispatch_width(unsigned n, const char *msg) * it. */ bool -fs_inst::is_partial_write() const +fs_inst::is_partial_write(unsigned grf_size) const { if (this->predicate && !this->predicate_trivial && this->opcode != BRW_OPCODE_SEL) @@ -491,10 +491,10 @@ fs_inst::is_partial_write() const if (!this->dst.is_contiguous()) return true; - if (this->dst.offset % REG_SIZE != 0) + if (this->dst.offset % grf_size != 0) return true; - return this->size_written % REG_SIZE != 0; + return this->size_written % grf_size != 0; } unsigned diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h index 2cfb15e7167..1c2b8cfbecd 100644 --- a/src/intel/compiler/brw_ir_fs.h +++ b/src/intel/compiler/brw_ir_fs.h @@ -55,7 +55,7 @@ public: bool is_send_from_grf() const; bool is_payload(unsigned arg) const; - bool is_partial_write() const; + bool is_partial_write(unsigned grf_size = REG_SIZE) const; unsigned components_read(unsigned i) const; unsigned size_read(const struct intel_device_info *devinfo, int arg) const; bool can_do_source_mods(const struct intel_device_info *devinfo) const; diff --git a/src/intel/compiler/brw_reg_allocate.cpp b/src/intel/compiler/brw_reg_allocate.cpp index e17b28e10b5..e476c0566e1 100644 --- a/src/intel/compiler/brw_reg_allocate.cpp +++ b/src/intel/compiler/brw_reg_allocate.cpp @@ -1234,7 +1234,7 @@ brw_reg_alloc::spill_reg(unsigned spill_reg) * write, there should be no need for the unspill since the * instruction will be overwriting the whole destination in any case. */ - if (inst->is_partial_write() || + if (inst->is_partial_write(reg_unit(devinfo) * REG_SIZE) || (!inst->force_writemask_all && !per_channel)) emit_unspill(ubld, &fs->shader_stats, spill_src, subset_spill_offset, regs_written(inst), ip);