From 93996c07e2230711a862b01a39a795d300298583 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 4 Aug 2025 09:27:33 +0300 Subject: [PATCH] brw: fix broadcast opcode The problem with the current code is that there is a disconnect between : - the virtual register size allocated - the dispatch size - the size_written value Only the last 2 are in sync and this confuses the spiller that only looks at the destination register allocation & dispatch size to figure out how much to spill. The solution in this change is to make BROADCAST more like MOV_INDIRECT, so that you can do a BROADCAST(8) that actually reads a SIMD32 register. We put the size of the register read into src2. Now the spiller sees correct read/write sizes just looking at the destination register & dispatch size. Signed-off-by: Lionel Landwerlin Fixes: 662339a2ff ("brw/build: Use SIMD8 temporaries in emit_uniformize") Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/13614 Reviewed-by: Ian Romanick Part-of: --- src/intel/compiler/brw_builder.h | 4 ++-- src/intel/compiler/brw_eu_defines.h | 4 ++++ src/intel/compiler/brw_inst.cpp | 3 ++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/intel/compiler/brw_builder.h b/src/intel/compiler/brw_builder.h index 199460d6cb3..39f07b1600a 100644 --- a/src/intel/compiler/brw_builder.h +++ b/src/intel/compiler/brw_builder.h @@ -879,8 +879,8 @@ public: /* BROADCAST will only write a single component after lowering. Munge * size_written here to match the allocated size of dst. */ - exec_all().emit(SHADER_OPCODE_BROADCAST, dst, value, index) - ->size_written = dst.component_size(xbld.dispatch_width()); + xbld.emit(SHADER_OPCODE_BROADCAST, dst, value, index, + brw_imm_ud(value.component_size(_dispatch_width))); return component(dst, 0); } diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index ac8d77bb136..1b57739b07a 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -361,6 +361,10 @@ enum opcode { * Return the index of the first enabled live channel and assign it to * to the first component of the destination. Frequently used as input * for the BROADCAST pseudo-opcode. + * + * Source 0: A value. + * Source 1: Index from Value to broadcast. + * Source 2: A size in byte of the value register. */ SHADER_OPCODE_FIND_LIVE_CHANNEL, diff --git a/src/intel/compiler/brw_inst.cpp b/src/intel/compiler/brw_inst.cpp index fb9648e4852..b66b9d9badc 100644 --- a/src/intel/compiler/brw_inst.cpp +++ b/src/intel/compiler/brw_inst.cpp @@ -188,7 +188,6 @@ brw_inst::is_control_source(unsigned arg) const case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: return arg == 0; - case SHADER_OPCODE_BROADCAST: case SHADER_OPCODE_SHUFFLE: case SHADER_OPCODE_QUAD_SWIZZLE: return arg == 1; @@ -198,6 +197,7 @@ brw_inst::is_control_source(unsigned arg) const case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: return arg == INTERP_SRC_MSG_DESC || arg == INTERP_SRC_NOPERSPECTIVE; + case SHADER_OPCODE_BROADCAST: case SHADER_OPCODE_MOV_INDIRECT: case SHADER_OPCODE_CLUSTER_BROADCAST: return arg == 1 || arg == 2; @@ -538,6 +538,7 @@ brw_inst::size_read(const struct intel_device_info *devinfo, int arg) const case SHADER_OPCODE_BARRIER: return REG_SIZE; + case SHADER_OPCODE_BROADCAST: case SHADER_OPCODE_MOV_INDIRECT: if (arg == 0) { assert(src[2].file == IMM);