mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-14 14:28:08 +02:00
radeonsi/gfx10: fix wave occupancy computations
Cc: 19.2 <mesa-stable@lists.freedesktop.org> Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
This commit is contained in:
parent
42ea0b7b52
commit
d95afd8b9e
4 changed files with 50 additions and 22 deletions
|
|
@ -187,7 +187,7 @@ unsigned ac_get_compute_resource_limits(struct radeon_info *info,
|
||||||
unsigned max_waves_per_sh,
|
unsigned max_waves_per_sh,
|
||||||
unsigned threadgroups_per_cu);
|
unsigned threadgroups_per_cu);
|
||||||
|
|
||||||
static inline unsigned ac_get_max_simd_waves(enum radeon_family family)
|
static inline unsigned ac_get_max_wave64_per_simd(enum radeon_family family)
|
||||||
{
|
{
|
||||||
|
|
||||||
switch (family) {
|
switch (family) {
|
||||||
|
|
@ -202,10 +202,26 @@ static inline unsigned ac_get_max_simd_waves(enum radeon_family family)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uint32_t
|
static inline unsigned ac_get_num_physical_vgprs(enum chip_class chip_class,
|
||||||
ac_get_num_physical_sgprs(enum chip_class chip_class)
|
unsigned wave_size)
|
||||||
{
|
{
|
||||||
return chip_class >= GFX8 ? 800 : 512;
|
/* The number is per SIMD. */
|
||||||
|
if (chip_class >= GFX10)
|
||||||
|
return wave_size == 32 ? 1024 : 512;
|
||||||
|
else
|
||||||
|
return 256;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint32_t
|
||||||
|
ac_get_num_physical_sgprs(const struct radeon_info *info)
|
||||||
|
{
|
||||||
|
/* The number is per SIMD. There is enough SGPRs for the maximum number
|
||||||
|
* of Wave32, which is double the number for Wave64.
|
||||||
|
*/
|
||||||
|
if (info->chip_class >= GFX10)
|
||||||
|
return 128 * ac_get_max_wave64_per_simd(info->family) * 2;
|
||||||
|
|
||||||
|
return info->chip_class >= GFX8 ? 800 : 512;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
|
||||||
|
|
@ -1274,7 +1274,7 @@ void radv_GetPhysicalDeviceProperties2(
|
||||||
|
|
||||||
/* SGPR. */
|
/* SGPR. */
|
||||||
properties->sgprsPerSimd =
|
properties->sgprsPerSimd =
|
||||||
ac_get_num_physical_sgprs(pdevice->rad_info.chip_class);
|
ac_get_num_physical_sgprs(&pdevice->rad_info);
|
||||||
properties->minSgprAllocation =
|
properties->minSgprAllocation =
|
||||||
pdevice->rad_info.chip_class >= GFX8 ? 16 : 8;
|
pdevice->rad_info.chip_class >= GFX8 ? 16 : 8;
|
||||||
properties->maxSgprAllocation =
|
properties->maxSgprAllocation =
|
||||||
|
|
|
||||||
|
|
@ -1249,7 +1249,7 @@ radv_get_max_waves(struct radv_device *device,
|
||||||
unsigned max_simd_waves;
|
unsigned max_simd_waves;
|
||||||
unsigned lds_per_wave = 0;
|
unsigned lds_per_wave = 0;
|
||||||
|
|
||||||
max_simd_waves = ac_get_max_simd_waves(device->physical_device->rad_info.family);
|
max_simd_waves = ac_get_max_wave64_per_simd(device->physical_device->rad_info.family);
|
||||||
|
|
||||||
if (stage == MESA_SHADER_FRAGMENT) {
|
if (stage == MESA_SHADER_FRAGMENT) {
|
||||||
lds_per_wave = conf->lds_size * lds_increment +
|
lds_per_wave = conf->lds_size * lds_increment +
|
||||||
|
|
@ -1265,7 +1265,8 @@ radv_get_max_waves(struct radv_device *device,
|
||||||
if (conf->num_sgprs)
|
if (conf->num_sgprs)
|
||||||
max_simd_waves =
|
max_simd_waves =
|
||||||
MIN2(max_simd_waves,
|
MIN2(max_simd_waves,
|
||||||
ac_get_num_physical_sgprs(chip_class) / conf->num_sgprs);
|
ac_get_num_physical_sgprs(&device->physical_device->rad_info) /
|
||||||
|
conf->num_sgprs);
|
||||||
|
|
||||||
if (conf->num_vgprs)
|
if (conf->num_vgprs)
|
||||||
max_simd_waves =
|
max_simd_waves =
|
||||||
|
|
@ -1362,7 +1363,7 @@ radv_GetShaderInfoAMD(VkDevice _device,
|
||||||
VkShaderStatisticsInfoAMD statistics = {};
|
VkShaderStatisticsInfoAMD statistics = {};
|
||||||
statistics.shaderStageMask = shaderStage;
|
statistics.shaderStageMask = shaderStage;
|
||||||
statistics.numPhysicalVgprs = RADV_NUM_PHYSICAL_VGPRS;
|
statistics.numPhysicalVgprs = RADV_NUM_PHYSICAL_VGPRS;
|
||||||
statistics.numPhysicalSgprs = ac_get_num_physical_sgprs(device->physical_device->rad_info.chip_class);
|
statistics.numPhysicalSgprs = ac_get_num_physical_sgprs(&device->physical_device->rad_info);
|
||||||
statistics.numAvailableSgprs = statistics.numPhysicalSgprs;
|
statistics.numAvailableSgprs = statistics.numPhysicalSgprs;
|
||||||
|
|
||||||
if (stage == MESA_SHADER_COMPUTE) {
|
if (stage == MESA_SHADER_COMPUTE) {
|
||||||
|
|
|
||||||
|
|
@ -5420,7 +5420,7 @@ static void si_calculate_max_simd_waves(struct si_shader *shader)
|
||||||
unsigned lds_per_wave = 0;
|
unsigned lds_per_wave = 0;
|
||||||
unsigned max_simd_waves;
|
unsigned max_simd_waves;
|
||||||
|
|
||||||
max_simd_waves = ac_get_max_simd_waves(sscreen->info.family);
|
max_simd_waves = ac_get_max_wave64_per_simd(sscreen->info.family);
|
||||||
|
|
||||||
/* Compute LDS usage for PS. */
|
/* Compute LDS usage for PS. */
|
||||||
switch (shader->selector->type) {
|
switch (shader->selector->type) {
|
||||||
|
|
@ -5454,16 +5454,25 @@ static void si_calculate_max_simd_waves(struct si_shader *shader)
|
||||||
if (conf->num_sgprs) {
|
if (conf->num_sgprs) {
|
||||||
max_simd_waves =
|
max_simd_waves =
|
||||||
MIN2(max_simd_waves,
|
MIN2(max_simd_waves,
|
||||||
ac_get_num_physical_sgprs(sscreen->info.chip_class) / conf->num_sgprs);
|
ac_get_num_physical_sgprs(&sscreen->info) / conf->num_sgprs);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (conf->num_vgprs)
|
if (conf->num_vgprs) {
|
||||||
max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);
|
/* Always print wave limits as Wave64, so that we can compare
|
||||||
|
* Wave32 and Wave64 with shader-db fairly. */
|
||||||
|
unsigned max_vgprs = ac_get_num_physical_vgprs(sscreen->info.chip_class, 64);
|
||||||
|
max_simd_waves = MIN2(max_simd_waves, max_vgprs / conf->num_vgprs);
|
||||||
|
}
|
||||||
|
|
||||||
/* LDS is 64KB per CU (4 SIMDs), which is 16KB per SIMD (usage above
|
/* LDS is 64KB per CU (4 SIMDs) on GFX6-9, which is 16KB per SIMD (usage above
|
||||||
* 16KB makes some SIMDs unoccupied). */
|
* 16KB makes some SIMDs unoccupied).
|
||||||
|
*
|
||||||
|
* LDS is 128KB in WGP mode and 64KB in CU mode. Assume the WGP mode is used.
|
||||||
|
*/
|
||||||
|
unsigned max_lds_size = sscreen->info.chip_class >= GFX10 ? 128*1024 : 64*1024;
|
||||||
|
unsigned max_lds_per_simd = max_lds_size / 4;
|
||||||
if (lds_per_wave)
|
if (lds_per_wave)
|
||||||
max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
|
max_simd_waves = MIN2(max_simd_waves, max_lds_per_simd / lds_per_wave);
|
||||||
|
|
||||||
shader->info.max_simd_waves = max_simd_waves;
|
shader->info.max_simd_waves = max_simd_waves;
|
||||||
}
|
}
|
||||||
|
|
@ -7167,15 +7176,17 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
|
||||||
*/
|
*/
|
||||||
if (sel->type == PIPE_SHADER_COMPUTE) {
|
if (sel->type == PIPE_SHADER_COMPUTE) {
|
||||||
unsigned wave_size = sscreen->compute_wave_size;
|
unsigned wave_size = sscreen->compute_wave_size;
|
||||||
unsigned max_vgprs = 256;
|
unsigned max_vgprs = ac_get_num_physical_vgprs(sscreen->info.chip_class,
|
||||||
unsigned max_sgprs = sscreen->info.chip_class >= GFX8 ? 800 : 512;
|
wave_size);
|
||||||
|
unsigned max_sgprs = ac_get_num_physical_sgprs(&sscreen->info);
|
||||||
unsigned max_sgprs_per_wave = 128;
|
unsigned max_sgprs_per_wave = 128;
|
||||||
unsigned max_block_threads = si_get_max_workgroup_size(shader);
|
unsigned simds_per_tg = 4; /* assuming WGP mode on gfx10 */
|
||||||
unsigned min_waves_per_cu = DIV_ROUND_UP(max_block_threads, wave_size);
|
unsigned threads_per_tg = si_get_max_workgroup_size(shader);
|
||||||
unsigned min_waves_per_simd = DIV_ROUND_UP(min_waves_per_cu, 4);
|
unsigned waves_per_tg = DIV_ROUND_UP(threads_per_tg, wave_size);
|
||||||
|
unsigned waves_per_simd = DIV_ROUND_UP(waves_per_tg, simds_per_tg);
|
||||||
|
|
||||||
max_vgprs = max_vgprs / min_waves_per_simd;
|
max_vgprs = max_vgprs / waves_per_simd;
|
||||||
max_sgprs = MIN2(max_sgprs / min_waves_per_simd, max_sgprs_per_wave);
|
max_sgprs = MIN2(max_sgprs / waves_per_simd, max_sgprs_per_wave);
|
||||||
|
|
||||||
if (shader->config.num_sgprs > max_sgprs ||
|
if (shader->config.num_sgprs > max_sgprs ||
|
||||||
shader->config.num_vgprs > max_vgprs) {
|
shader->config.num_vgprs > max_vgprs) {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue