diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index 0816370e25a..bb4ae1bcad7 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -1154,6 +1154,9 @@ emit_program(Program* program, std::vector& code) code.insert(code.end(), (uint32_t*)program->constant_data.data(), (uint32_t*)(program->constant_data.data() + program->constant_data.size())); + program->config->scratch_bytes_per_wave = align( + program->config->scratch_bytes_per_wave, program->dev.scratch_alloc_granule); + return exec_size; } diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index 3ff7f50bf0f..abde629d0e5 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -903,8 +903,7 @@ setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* c for (unsigned i = 0; i < shader_count; i++) scratch_size = std::max(scratch_size, shaders[i]->scratch_size); - ctx.program->config->scratch_bytes_per_wave = - align(scratch_size * ctx.program->wave_size, ctx.program->dev.scratch_alloc_granule); + ctx.program->config->scratch_bytes_per_wave = scratch_size * ctx.program->wave_size; unsigned nir_num_blocks = 0; for (unsigned i = 0; i < shader_count; i++) diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp index 49a4370aa47..3cc3b23074f 100644 --- a/src/amd/compiler/aco_spill.cpp +++ b/src/amd/compiler/aco_spill.cpp @@ -1856,8 +1856,7 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) } /* update required scratch memory */ - ctx.program->config->scratch_bytes_per_wave += align( - ctx.vgpr_spill_slots * 4 * ctx.program->wave_size, ctx.program->dev.scratch_alloc_granule); + ctx.program->config->scratch_bytes_per_wave += ctx.vgpr_spill_slots * 4 * ctx.program->wave_size; /* SSA elimination inserts copies for logical phis right before p_logical_end * So if a linear vgpr is used between that p_logical_end and the branch,