aco: align scratch size during assembly

This lets us use less scratch if both VGPR spilling and scratch intrinsics
are used.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20534>
This commit is contained in:
Rhys Perry 2023-01-05 14:01:21 +00:00 committed by Marge Bot
parent c9846158cd
commit 810ced93f3
3 changed files with 5 additions and 4 deletions

View file

@ -1154,6 +1154,9 @@ emit_program(Program* program, std::vector<uint32_t>& code)
code.insert(code.end(), (uint32_t*)program->constant_data.data(),
(uint32_t*)(program->constant_data.data() + program->constant_data.size()));
program->config->scratch_bytes_per_wave = align(
program->config->scratch_bytes_per_wave, program->dev.scratch_alloc_granule);
return exec_size;
}

View file

@ -903,8 +903,7 @@ setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* c
for (unsigned i = 0; i < shader_count; i++)
scratch_size = std::max(scratch_size, shaders[i]->scratch_size);
ctx.program->config->scratch_bytes_per_wave =
align(scratch_size * ctx.program->wave_size, ctx.program->dev.scratch_alloc_granule);
ctx.program->config->scratch_bytes_per_wave = scratch_size * ctx.program->wave_size;
unsigned nir_num_blocks = 0;
for (unsigned i = 0; i < shader_count; i++)

View file

@ -1856,8 +1856,7 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
}
/* update required scratch memory */
ctx.program->config->scratch_bytes_per_wave += align(
ctx.vgpr_spill_slots * 4 * ctx.program->wave_size, ctx.program->dev.scratch_alloc_granule);
ctx.program->config->scratch_bytes_per_wave += ctx.vgpr_spill_slots * 4 * ctx.program->wave_size;
/* SSA elimination inserts copies for logical phis right before p_logical_end
* So if a linear vgpr is used between that p_logical_end and the branch,