From 0cfabe0613cd693fc29dab72efc5fbe5e720401a Mon Sep 17 00:00:00 2001 From: Natalie Vock Date: Tue, 18 Nov 2025 15:34:25 +0100 Subject: [PATCH] aco/lower_to_hw_instr: Add scratch size in call lowering We did this in the preserved spiller previously, but let's move it here. Part-of: --- src/amd/compiler/aco_lower_to_hw_instr.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index db771cf2973..61e7913c62e 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -2944,10 +2944,14 @@ lower_to_hw_instr(Program* program) } else if (instr->isCall()) { unsigned extra_param_count = 2; PhysReg stack_reg = instr->operands[0].physReg(); + unsigned scratch_size = ctx.program->config->scratch_bytes_per_wave; + if (ctx.program->gfx_level >= GFX9) + scratch_size /= ctx.program->wave_size; - if (instr->operands[1].constantValue()) { + if (instr->operands[1].constantValue() || scratch_size) { bld.sop2(aco_opcode::s_add_u32, Definition(stack_reg, s1), Definition(scc, s1), - Operand(stack_reg, s1), instr->operands[1]); + Operand(stack_reg, s1), + Operand::c32(instr->operands[1].constantValue() + scratch_size)); if (program->gfx_level < GFX9) { /* The callee's VGPR spill buffer resource needs to be based at the * start of callee scratch. @@ -2961,9 +2965,10 @@ lower_to_hw_instr(Program* program) bld.sop1(aco_opcode::s_swappc_b64, Definition(instr->definitions[0].physReg(), s2), Operand(instr->operands[extra_param_count + 1].physReg(), s2)); - if (instr->operands[1].constantValue()) { + if (instr->operands[1].constantValue() || scratch_size) { bld.sop2(aco_opcode::s_sub_u32, Definition(stack_reg, s1), Definition(scc, s1), - Operand(stack_reg, s1), instr->operands[1]); + Operand(stack_reg, s1), + Operand::c32(instr->operands[1].constantValue() + scratch_size)); if (program->gfx_level < GFX9) { PhysReg rsrc_dword1 = stack_reg.advance(4); bld.sop2(aco_opcode::s_subb_u32, Definition(rsrc_dword1, s1), Definition(scc, s1),