mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-06 06:40:08 +01:00
aco: consider that GFX10.3 allocates LDS in 1024 byte blocks
fossil-db (GFX10.3): Totals from 3 (0.00% of 139391) affected shaders: VMEM: 513 -> 511 (-0.39%) SMEM: 94 -> 92 (-2.13%) VClause: 31 -> 30 (-3.23%) fossil-db (GFX10.3, wave32): Totals from 4 (0.00% of 139391) affected shaders: VClause: 82 -> 81 (-1.22%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8761>
This commit is contained in:
parent
7ff805a19d
commit
b759557cac
4 changed files with 10 additions and 10 deletions
|
|
@ -398,8 +398,7 @@ setup_vs_variables(isel_context *ctx, nir_shader *nir)
|
|||
if (ctx->stage == vertex_ngg && ctx->args->options->key.vs_common_out.export_prim_id) {
|
||||
/* We need to store the primitive IDs in LDS */
|
||||
unsigned lds_size = ctx->program->info->ngg_info.esgs_ring_size;
|
||||
ctx->program->config->lds_size = (lds_size + ctx->program->lds_alloc_granule - 1) /
|
||||
ctx->program->lds_alloc_granule;
|
||||
ctx->program->config->lds_size = DIV_ROUND_UP(lds_size, ctx->program->lds_encoding_granule);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -424,7 +423,7 @@ void setup_gs_variables(isel_context *ctx, nir_shader *nir)
|
|||
unsigned total_lds_bytes = esgs_ring_bytes + ngg_emit_bytes + ngg_gs_scratch_bytes;
|
||||
assert(total_lds_bytes >= ctx->ngg_gs_emit_addr);
|
||||
assert(total_lds_bytes >= ctx->ngg_gs_scratch_addr);
|
||||
ctx->program->config->lds_size = (total_lds_bytes + ctx->program->lds_alloc_granule - 1) / ctx->program->lds_alloc_granule;
|
||||
ctx->program->config->lds_size = DIV_ROUND_UP(total_lds_bytes, ctx->program->lds_encoding_granule);
|
||||
|
||||
/* Make sure we have enough room for emitted GS vertices */
|
||||
if (nir->info.gs.vertices_out)
|
||||
|
|
@ -488,8 +487,7 @@ setup_tcs_info(isel_context *ctx, nir_shader *nir, nir_shader *vs)
|
|||
|
||||
ctx->args->shader_info->tcs.num_patches = ctx->tcs_num_patches;
|
||||
ctx->args->shader_info->tcs.num_lds_blocks = lds_size;
|
||||
ctx->program->config->lds_size = (lds_size + ctx->program->lds_alloc_granule - 1) /
|
||||
ctx->program->lds_alloc_granule;
|
||||
ctx->program->config->lds_size = DIV_ROUND_UP(lds_size, ctx->program->lds_encoding_granule);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -520,8 +518,7 @@ setup_variables(isel_context *ctx, nir_shader *nir)
|
|||
break;
|
||||
}
|
||||
case MESA_SHADER_COMPUTE: {
|
||||
ctx->program->config->lds_size = (nir->info.cs.shared_size + ctx->program->lds_alloc_granule - 1) /
|
||||
ctx->program->lds_alloc_granule;
|
||||
ctx->program->config->lds_size = DIV_ROUND_UP(nir->info.cs.shared_size, ctx->program->lds_encoding_granule);
|
||||
break;
|
||||
}
|
||||
case MESA_SHADER_VERTEX: {
|
||||
|
|
@ -544,7 +541,7 @@ setup_variables(isel_context *ctx, nir_shader *nir)
|
|||
}
|
||||
|
||||
/* Make sure we fit the available LDS space. */
|
||||
assert((ctx->program->config->lds_size * ctx->program->lds_alloc_granule) <= ctx->program->lds_limit);
|
||||
assert((ctx->program->config->lds_size * ctx->program->lds_encoding_granule) <= ctx->program->lds_limit);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -93,7 +93,8 @@ void init_program(Program *program, Stage stage, struct radv_shader_info *info,
|
|||
program->wave_size = info->wave_size;
|
||||
program->lane_mask = program->wave_size == 32 ? s1 : s2;
|
||||
|
||||
program->lds_alloc_granule = chip_class >= GFX7 ? 512 : 256;
|
||||
program->lds_encoding_granule = chip_class >= GFX7 ? 512 : 256;
|
||||
program->lds_alloc_granule = chip_class >= GFX10_3 ? 1024 : program->lds_encoding_granule;
|
||||
program->lds_limit = chip_class >= GFX7 ? 65536 : 32768;
|
||||
/* apparently gfx702 also has 16-bank LDS but I can't find a family for that */
|
||||
program->has_16bank_lds = family == CHIP_KABINI || family == CHIP_STONEY;
|
||||
|
|
|
|||
|
|
@ -1817,6 +1817,7 @@ public:
|
|||
Temp scratch_offset;
|
||||
|
||||
uint16_t min_waves = 0;
|
||||
uint16_t lds_encoding_granule;
|
||||
uint16_t lds_alloc_granule;
|
||||
uint32_t lds_limit; /* in bytes */
|
||||
bool has_16bank_lds;
|
||||
|
|
|
|||
|
|
@ -353,7 +353,8 @@ void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand)
|
|||
unsigned waves_per_workgroup = calc_waves_per_workgroup(program);
|
||||
unsigned workgroups_per_cu_wgp = max_waves_per_simd * simd_per_cu_wgp / waves_per_workgroup;
|
||||
if (program->config->lds_size) {
|
||||
unsigned lds = program->config->lds_size * program->lds_alloc_granule;
|
||||
unsigned lds = program->config->lds_size * program->lds_encoding_granule;
|
||||
lds = align(lds, program->lds_alloc_granule);
|
||||
workgroups_per_cu_wgp = std::min(workgroups_per_cu_wgp, lds_limit / lds);
|
||||
}
|
||||
if (waves_per_workgroup > 1 && program->chip_class < GFX10)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue