mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-06 13:10:10 +01:00
ac, aco, radv: Clarify LDS size on GFX6, and NGG shaders.
This information was wrong in some places, let's fix it now. GFX6: The GPU has 64KB LDS, but only 32KB is usable by a workgroup. NGG: There was some misinformation about NGG only being able to address 32 KB LDS, it turns out this is actually not true and it can address the full 64K. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21935>
This commit is contained in:
parent
edf30b1c6d
commit
4ae46840cd
5 changed files with 26 additions and 9 deletions
|
|
@ -1058,12 +1058,17 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info)
|
|||
}
|
||||
info->r600_has_virtual_memory = true;
|
||||
|
||||
/* LDS is 64KB per CU (4 SIMDs), which is 16KB per SIMD (usage above
|
||||
/* LDS is 64KB per CU (4 SIMDs on GFX6-9), which is 16KB per SIMD (usage above
|
||||
* 16KB makes some SIMDs unoccupied).
|
||||
*
|
||||
* LDS is 128KB in WGP mode and 64KB in CU mode. Assume the WGP mode is used.
|
||||
* GFX10+: LDS is 128KB in WGP mode and 64KB in CU mode. Assume the WGP mode is used.
|
||||
* GFX7+: Workgroups can use up to 64KB.
|
||||
* GFX6: There is 64KB LDS per CU, but a workgroup can only use up to 32KB.
|
||||
*/
|
||||
info->lds_size_per_workgroup = info->gfx_level >= GFX10 ? 128 * 1024 : 64 * 1024;
|
||||
info->lds_size_per_workgroup = info->gfx_level >= GFX10 ? 128 * 1024
|
||||
: info->gfx_level >= GFX7 ? 64 * 1024
|
||||
: 32 * 1024;
|
||||
|
||||
/* lds_encode_granularity is the block size used for encoding registers.
|
||||
* lds_alloc_granularity is what the hardware will align the LDS size to.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -113,6 +113,18 @@ Some instructions have a `_LEGACY` variant which implements "DX9 rules", in whic
|
|||
the zero "wins" in multiplications, ie. `0.0*x` is always `0.0`. The VEGA ISA
|
||||
mentions `V_MAC_LEGACY_F32` but this instruction is not really there on VEGA.
|
||||
|
||||
## LDS size and allocation granule
|
||||
|
||||
GFX7-8 ISA manuals are mistaken about the available LDS size.
|
||||
|
||||
* GFX7+ workgroups can use 64KB LDS.
|
||||
There is 64KB LDS per CU.
|
||||
* GFX6 workgroups can use 32KB LDS.
|
||||
There is 64KB LDS per CU, but a single workgroup can only use half of it.
|
||||
|
||||
Regarding the LDS allocation granule, Mesa has the correct details and
|
||||
the ISA manuals are mistaken.
|
||||
|
||||
## `m0` with LDS instructions on Vega and newer
|
||||
|
||||
The Vega ISA doc (both the old one and the "7nm" one) claims that LDS instructions
|
||||
|
|
|
|||
|
|
@ -254,8 +254,6 @@ setup_vs_variables(isel_context* ctx, nir_shader* nir)
|
|||
if (ctx->stage == vertex_ngg) {
|
||||
ctx->program->config->lds_size =
|
||||
DIV_ROUND_UP(nir->info.shared_size, ctx->program->dev.lds_encoding_granule);
|
||||
assert((ctx->program->config->lds_size * ctx->program->dev.lds_encoding_granule) <
|
||||
(32 * 1024));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -285,8 +283,6 @@ setup_tes_variables(isel_context* ctx, nir_shader* nir)
|
|||
if (ctx->stage == tess_eval_ngg) {
|
||||
ctx->program->config->lds_size =
|
||||
DIV_ROUND_UP(nir->info.shared_size, ctx->program->dev.lds_encoding_granule);
|
||||
assert((ctx->program->config->lds_size * ctx->program->dev.lds_encoding_granule) <
|
||||
(32 * 1024));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -295,7 +291,6 @@ setup_ms_variables(isel_context* ctx, nir_shader* nir)
|
|||
{
|
||||
ctx->program->config->lds_size =
|
||||
DIV_ROUND_UP(nir->info.shared_size, ctx->program->dev.lds_encoding_granule);
|
||||
assert((ctx->program->config->lds_size * ctx->program->dev.lds_encoding_granule) < (32 * 1024));
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -101,7 +101,10 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
|
|||
program->dev.lds_encoding_granule = gfx_level >= GFX11 && stage == fragment_fs ? 1024 :
|
||||
gfx_level >= GFX7 ? 512 : 256;
|
||||
program->dev.lds_alloc_granule = gfx_level >= GFX10_3 ? 1024 : program->dev.lds_encoding_granule;
|
||||
|
||||
/* GFX6: There is 64KB LDS per CU, but a single workgroup can only use 32KB. */
|
||||
program->dev.lds_limit = gfx_level >= GFX7 ? 65536 : 32768;
|
||||
|
||||
/* apparently gfx702 also has 16-bank LDS but I can't find a family for that */
|
||||
program->dev.has_16bank_lds = family == CHIP_KABINI || family == CHIP_STONEY;
|
||||
|
||||
|
|
|
|||
|
|
@ -137,7 +137,9 @@ radv_null_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *info)
|
|||
else
|
||||
info->num_physical_wave64_vgprs_per_simd = 256;
|
||||
info->num_simd_per_compute_unit = info->gfx_level >= GFX10 ? 2 : 4;
|
||||
info->lds_size_per_workgroup = info->gfx_level >= GFX10 ? 128 * 1024 : 64 * 1024;
|
||||
info->lds_size_per_workgroup = info->gfx_level >= GFX10 ? 128 * 1024
|
||||
: info->gfx_level >= GFX7 ? 64 * 1024
|
||||
: 32 * 1024;
|
||||
info->lds_encode_granularity = info->gfx_level >= GFX7 ? 128 * 4 : 64 * 4;
|
||||
info->lds_alloc_granularity =
|
||||
info->gfx_level >= GFX10_3 ? 256 * 4 : info->lds_encode_granularity;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue