brw: fix broadcast opcode

The problem with the current code is that there is a disconnect between :
   - the virtual register size allocated
   - the dispatch size
   - the size_written value

Only the last 2 are in sync and this confuses the spiller that only
looks at the destination register allocation & dispatch size to figure
out how much to spill.

The solution in this change is to make BROADCAST more like
MOV_INDIRECT, so that you can do a BROADCAST(8) that actually reads a
SIMD32 register. We put the size of the register read into src2.

Now the spiller sees correct read/write sizes just looking at the
destination register & dispatch size.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: 662339a2ff ("brw/build: Use SIMD8 temporaries in emit_uniformize")
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/13614
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36564>
This commit is contained in:
Lionel Landwerlin 2025-08-04 09:27:33 +03:00
parent 57484f6202
commit 93996c07e2
3 changed files with 8 additions and 3 deletions

View file

@ -879,8 +879,8 @@ public:
/* BROADCAST will only write a single component after lowering. Munge
* size_written here to match the allocated size of dst.
*/
exec_all().emit(SHADER_OPCODE_BROADCAST, dst, value, index)
->size_written = dst.component_size(xbld.dispatch_width());
xbld.emit(SHADER_OPCODE_BROADCAST, dst, value, index,
brw_imm_ud(value.component_size(_dispatch_width)));
return component(dst, 0);
}

View file

@ -361,6 +361,10 @@ enum opcode {
* Return the index of the first enabled live channel and assign it to
* to the first component of the destination. Frequently used as input
* for the BROADCAST pseudo-opcode.
*
* Source 0: A value.
* Source 1: Index from Value to broadcast.
* Source 2: A size in byte of the value register.
*/
SHADER_OPCODE_FIND_LIVE_CHANNEL,

View file

@ -188,7 +188,6 @@ brw_inst::is_control_source(unsigned arg) const
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
return arg == 0;
case SHADER_OPCODE_BROADCAST:
case SHADER_OPCODE_SHUFFLE:
case SHADER_OPCODE_QUAD_SWIZZLE:
return arg == 1;
@ -198,6 +197,7 @@ brw_inst::is_control_source(unsigned arg) const
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
return arg == INTERP_SRC_MSG_DESC || arg == INTERP_SRC_NOPERSPECTIVE;
case SHADER_OPCODE_BROADCAST:
case SHADER_OPCODE_MOV_INDIRECT:
case SHADER_OPCODE_CLUSTER_BROADCAST:
return arg == 1 || arg == 2;
@ -538,6 +538,7 @@ brw_inst::size_read(const struct intel_device_info *devinfo, int arg) const
case SHADER_OPCODE_BARRIER:
return REG_SIZE;
case SHADER_OPCODE_BROADCAST:
case SHADER_OPCODE_MOV_INDIRECT:
if (arg == 0) {
assert(src[2].file == IMM);