jay/lower_spill: use 1 less temporary

Spill lowering needs to resreve some registers, but the whole 1.1 GRF business
will be tricky to deal with for future partitioning changes. Fortunately, we can
compact things a bit, using exactly 1 GRF in SIMD16 mode.

SIMD16:

   Totals:
   Instrs: 2752302 -> 2753102 (+0.03%); split: -0.01%, +0.03%
   CodeSize: 41067280 -> 41075568 (+0.02%); split: -0.01%, +0.03%

   Totals from 27 (1.02% of 2647) affected shaders:
   Instrs: 402012 -> 402812 (+0.20%); split: -0.04%, +0.24%
   CodeSize: 6094752 -> 6103040 (+0.14%); split: -0.04%, +0.18%

SIMD32:

   Totals:
   Instrs: 4570539 -> 4572379 (+0.04%); split: -0.09%, +0.13%
   CodeSize: 68437760 -> 68450816 (+0.02%); split: -0.11%, +0.13%

   Totals from 478 (18.06% of 2647) affected shaders:
   Instrs: 3147314 -> 3149154 (+0.06%); split: -0.13%, +0.19%
   CodeSize: 47446400 -> 47459456 (+0.03%); split: -0.16%, +0.19%

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41808>
This commit is contained in:
Alyssa Rosenzweig 2026-05-20 14:45:58 -04:00 committed by Marge Bot
parent 1e7f68d11b
commit 64acab1d69
2 changed files with 14 additions and 13 deletions

View file

@ -53,10 +53,9 @@ jay_lower_spill(jay_function *func)
/* We reserve the top UGPRs for spilling by ABI */
unsigned ugpr_reservation = func->shader->num_regs[UGPR];
assert(util_is_aligned(ugpr_reservation + 1, func->shader->dispatch_width));
assert(util_is_aligned(ugpr_reservation, func->shader->dispatch_width));
jay_def surf = jay_bare_reg(UGPR, ugpr_reservation);
jay_def sp = jay_bare_reg(UGPR, ugpr_reservation + 1);
jay_def sp = jay_bare_reg(UGPR, ugpr_reservation);
sp.num_values_m1 = func->shader->dispatch_width - 1;
/* Calculate how much stack space we need */
@ -76,8 +75,9 @@ jay_lower_spill(jay_function *func)
* TODO: Need ABI for multi-function.
*/
assert(func->is_entrypoint);
jay_AND(&b, JAY_TYPE_U32, surf, jay_bare_reg(UGPR, 5), ~BITFIELD_MASK(10));
jay_SHR(&b, JAY_TYPE_U32, ADDRESS_REG, surf, 4);
jay_def tmpu = jay_bare_reg(UGPR, ugpr_reservation);
jay_AND(&b, JAY_TYPE_U32, tmpu, jay_bare_reg(UGPR, 5), ~BITFIELD_MASK(10));
jay_SHR(&b, JAY_TYPE_U32, ADDRESS_REG, tmpu, 4);
/* We use a 32-bit strided stack: SP = scratch + (lane ID * 4) */
jay_def tmp2 = jay_bare_reg(GPR, func->shader->partition.base2);
@ -104,7 +104,8 @@ jay_lower_spill(jay_function *func)
if (I->op == JAY_OPCODE_MOV && jay_is_send_like(I)) {
if (!address_valid) {
jay_SHR(&b, JAY_TYPE_U32, ADDRESS_REG, surf, 4);
jay_MOV(&b, ADDRESS_REG, tmpu);
jay_MOV(&b, tmpu, b.shader->scratch_size);
address_valid = true;
}
@ -118,9 +119,8 @@ jay_lower_spill(jay_function *func)
jay_remove_instruction(I);
} else if (I->op == JAY_OPCODE_SHUFFLE) {
/* Shuffles implicitly clobber the address register so we'll need to
* rematerialize the surface state (but be lazy).
*/
/* Shuffles implicitly clobber the address register. Spill it. */
jay_MOV(&b, tmpu, ADDRESS_REG);
address_valid = false;
}
}
@ -128,7 +128,8 @@ jay_lower_spill(jay_function *func)
/* Canonicalize our internal registers at block boundaries */
if (jay_num_successors(block, GPR) > 0) {
if (!address_valid) {
jay_SHR(&b, JAY_TYPE_U32, ADDRESS_REG, surf, 4);
jay_MOV(&b, ADDRESS_REG, tmpu);
jay_MOV(&b, tmpu, b.shader->scratch_size);
}
if (sp_delta_B > 0) {

View file

@ -1460,7 +1460,7 @@ jay_partition_grf(jay_shader *shader)
*/
jay_foreach_preload(jay_shader_get_entrypoint(shader), I) {
unsigned end = jay_preload_reg(I) + jay_num_values(I->dst);
unsigned extra = I->dst.file == UGPR ? shader->dispatch_width + 1 : 0;
unsigned extra = I->dst.file == UGPR ? shader->dispatch_width : 0;
assert(I->dst.file < JAY_NUM_GRF_FILES);
demand[I->dst.file] = MAX2(demand[I->dst.file], end + extra);
}
@ -1561,7 +1561,7 @@ jay_register_allocate_function(jay_function *f)
if (spilled) {
/* Spilling requires reserving UGPRs for spilling */
unsigned reservation = f->shader->dispatch_width + 1;
unsigned reservation = f->shader->dispatch_width;
f->shader->num_regs[UGPR] -= reservation;
f->shader->partition.large_ugpr_block.len -= reservation;
@ -1685,7 +1685,7 @@ jay_register_allocate_function(jay_function *f)
*/
if (spilled) {
jay_lower_spill(f);
f->shader->num_regs[UGPR] += f->shader->dispatch_width + 1;
f->shader->num_regs[UGPR] += f->shader->dispatch_width;
}
}