mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 21:50:12 +01:00
aco: align scratch size during assembly
This lets us use less scratch if both VGPR spilling and scratch intrinsics are used. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20534>
This commit is contained in:
parent
c9846158cd
commit
810ced93f3
3 changed files with 5 additions and 4 deletions
|
|
@ -1154,6 +1154,9 @@ emit_program(Program* program, std::vector<uint32_t>& code)
|
|||
code.insert(code.end(), (uint32_t*)program->constant_data.data(),
|
||||
(uint32_t*)(program->constant_data.data() + program->constant_data.size()));
|
||||
|
||||
program->config->scratch_bytes_per_wave = align(
|
||||
program->config->scratch_bytes_per_wave, program->dev.scratch_alloc_granule);
|
||||
|
||||
return exec_size;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -903,8 +903,7 @@ setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* c
|
|||
for (unsigned i = 0; i < shader_count; i++)
|
||||
scratch_size = std::max(scratch_size, shaders[i]->scratch_size);
|
||||
|
||||
ctx.program->config->scratch_bytes_per_wave =
|
||||
align(scratch_size * ctx.program->wave_size, ctx.program->dev.scratch_alloc_granule);
|
||||
ctx.program->config->scratch_bytes_per_wave = scratch_size * ctx.program->wave_size;
|
||||
|
||||
unsigned nir_num_blocks = 0;
|
||||
for (unsigned i = 0; i < shader_count; i++)
|
||||
|
|
|
|||
|
|
@ -1856,8 +1856,7 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr)
|
|||
}
|
||||
|
||||
/* update required scratch memory */
|
||||
ctx.program->config->scratch_bytes_per_wave += align(
|
||||
ctx.vgpr_spill_slots * 4 * ctx.program->wave_size, ctx.program->dev.scratch_alloc_granule);
|
||||
ctx.program->config->scratch_bytes_per_wave += ctx.vgpr_spill_slots * 4 * ctx.program->wave_size;
|
||||
|
||||
/* SSA elimination inserts copies for logical phis right before p_logical_end
|
||||
* So if a linear vgpr is used between that p_logical_end and the branch,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue