mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 18:00:13 +01:00
ac/gpu_info: replace num_good_cu_per_sh with min/max_good_cu_per_sa
Perf counters use the new max number. Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5184>
This commit is contained in:
parent
8c3fe285c9
commit
2cf46f2e3d
9 changed files with 19 additions and 15 deletions
|
|
@ -624,14 +624,15 @@ bool ac_query_gpu_info(int fd, void *dev_p,
|
||||||
util_bitcount(info->cu_mask[i][j]);
|
util_bitcount(info->cu_mask[i][j]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
info->num_good_cu_per_sh = info->num_good_compute_units /
|
|
||||||
(info->max_se * info->max_sh_per_se);
|
|
||||||
|
|
||||||
/* Round down to the nearest multiple of 2, because the hw can't
|
/* On GFX10, only whole WGPs (in units of 2 CUs) can be disabled,
|
||||||
* disable CUs. It can only disable whole WGPs (dual-CUs).
|
* and max - min <= 2.
|
||||||
*/
|
*/
|
||||||
if (info->chip_class >= GFX10)
|
unsigned cu_group = info->chip_class >= GFX10 ? 2 : 1;
|
||||||
info->num_good_cu_per_sh -= info->num_good_cu_per_sh % 2;
|
info->max_good_cu_per_sa = DIV_ROUND_UP(info->num_good_compute_units,
|
||||||
|
(info->max_se * info->max_sh_per_se * cu_group)) * cu_group;
|
||||||
|
info->min_good_cu_per_sa = (info->num_good_compute_units /
|
||||||
|
(info->max_se * info->max_sh_per_se * cu_group)) * cu_group;
|
||||||
|
|
||||||
memcpy(info->si_tile_mode_array, amdinfo->gb_tile_mode,
|
memcpy(info->si_tile_mode_array, amdinfo->gb_tile_mode,
|
||||||
sizeof(amdinfo->gb_tile_mode));
|
sizeof(amdinfo->gb_tile_mode));
|
||||||
|
|
@ -910,7 +911,8 @@ void ac_print_gpu_info(struct radeon_info *info)
|
||||||
printf("Shader core info:\n");
|
printf("Shader core info:\n");
|
||||||
printf(" max_shader_clock = %i\n", info->max_shader_clock);
|
printf(" max_shader_clock = %i\n", info->max_shader_clock);
|
||||||
printf(" num_good_compute_units = %i\n", info->num_good_compute_units);
|
printf(" num_good_compute_units = %i\n", info->num_good_compute_units);
|
||||||
printf(" num_good_cu_per_sh = %i\n", info->num_good_cu_per_sh);
|
printf(" max_good_cu_per_sa = %i\n", info->max_good_cu_per_sa);
|
||||||
|
printf(" min_good_cu_per_sa = %i\n", info->min_good_cu_per_sa);
|
||||||
printf(" max_se = %i\n", info->max_se);
|
printf(" max_se = %i\n", info->max_se);
|
||||||
printf(" max_sh_per_se = %i\n", info->max_sh_per_se);
|
printf(" max_sh_per_se = %i\n", info->max_sh_per_se);
|
||||||
printf(" max_wave64_per_simd = %i\n", info->max_wave64_per_simd);
|
printf(" max_wave64_per_simd = %i\n", info->max_wave64_per_simd);
|
||||||
|
|
|
||||||
|
|
@ -157,7 +157,8 @@ struct radeon_info {
|
||||||
uint32_t r600_max_quad_pipes; /* wave size / 16 */
|
uint32_t r600_max_quad_pipes; /* wave size / 16 */
|
||||||
uint32_t max_shader_clock;
|
uint32_t max_shader_clock;
|
||||||
uint32_t num_good_compute_units;
|
uint32_t num_good_compute_units;
|
||||||
uint32_t num_good_cu_per_sh;
|
uint32_t max_good_cu_per_sa;
|
||||||
|
uint32_t min_good_cu_per_sa; /* min != max if SAs have different # of CUs */
|
||||||
uint32_t max_se; /* shader engines */
|
uint32_t max_se; /* shader engines */
|
||||||
uint32_t max_sh_per_se; /* shader arrays per shader engine */
|
uint32_t max_sh_per_se; /* shader arrays per shader engine */
|
||||||
uint32_t max_wave64_per_simd;
|
uint32_t max_wave64_per_simd;
|
||||||
|
|
|
||||||
|
|
@ -1753,7 +1753,7 @@ void radv_GetPhysicalDeviceProperties2(
|
||||||
properties->shaderArraysPerEngineCount =
|
properties->shaderArraysPerEngineCount =
|
||||||
pdevice->rad_info.max_sh_per_se;
|
pdevice->rad_info.max_sh_per_se;
|
||||||
properties->computeUnitsPerShaderArray =
|
properties->computeUnitsPerShaderArray =
|
||||||
pdevice->rad_info.num_good_cu_per_sh;
|
pdevice->rad_info.min_good_cu_per_sa;
|
||||||
properties->simdPerComputeUnit =
|
properties->simdPerComputeUnit =
|
||||||
pdevice->rad_info.num_simd_per_compute_unit;
|
pdevice->rad_info.num_simd_per_compute_unit;
|
||||||
properties->wavefrontsPerSimd =
|
properties->wavefrontsPerSimd =
|
||||||
|
|
|
||||||
|
|
@ -358,7 +358,7 @@ radv_fill_sqtt_asic_info(struct radv_device *device,
|
||||||
chunk->vgprs_per_simd = rad_info->num_physical_wave64_vgprs_per_simd;
|
chunk->vgprs_per_simd = rad_info->num_physical_wave64_vgprs_per_simd;
|
||||||
chunk->sgprs_per_simd = rad_info->num_physical_sgprs_per_simd;
|
chunk->sgprs_per_simd = rad_info->num_physical_sgprs_per_simd;
|
||||||
chunk->shader_engines = rad_info->max_se;
|
chunk->shader_engines = rad_info->max_se;
|
||||||
chunk->compute_unit_per_shader_engine = rad_info->num_good_cu_per_sh;
|
chunk->compute_unit_per_shader_engine = rad_info->min_good_cu_per_sa;
|
||||||
chunk->simd_per_compute_unit = rad_info->num_simd_per_compute_unit;
|
chunk->simd_per_compute_unit = rad_info->num_simd_per_compute_unit;
|
||||||
chunk->wavefronts_per_simd = rad_info->max_wave64_per_simd;
|
chunk->wavefronts_per_simd = rad_info->max_wave64_per_simd;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -293,7 +293,7 @@ si_emit_graphics(struct radv_device *device,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Compute LATE_ALLOC_VS.LIMIT. */
|
/* Compute LATE_ALLOC_VS.LIMIT. */
|
||||||
unsigned num_cu_per_sh = physical_device->rad_info.num_good_cu_per_sh;
|
unsigned num_cu_per_sh = physical_device->rad_info.min_good_cu_per_sa;
|
||||||
unsigned late_alloc_wave64 = 0; /* The limit is per SH. */
|
unsigned late_alloc_wave64 = 0; /* The limit is per SH. */
|
||||||
unsigned late_alloc_wave64_gs = 0;
|
unsigned late_alloc_wave64_gs = 0;
|
||||||
unsigned cu_mask_vs = 0xffff;
|
unsigned cu_mask_vs = 0xffff;
|
||||||
|
|
|
||||||
|
|
@ -1256,7 +1256,7 @@ void si_init_perfcounters(struct si_screen *screen)
|
||||||
else if (!strcmp(block->b->b->name, "TA") ||
|
else if (!strcmp(block->b->b->name, "TA") ||
|
||||||
!strcmp(block->b->b->name, "TCP") ||
|
!strcmp(block->b->b->name, "TCP") ||
|
||||||
!strcmp(block->b->b->name, "TD")) {
|
!strcmp(block->b->b->name, "TD")) {
|
||||||
block->num_instances = MAX2(1, screen->info.num_good_cu_per_sh);
|
block->num_instances = MAX2(1, screen->info.max_good_cu_per_sa);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (si_pc_block_has_per_instance_groups(pc, block)) {
|
if (si_pc_block_has_per_instance_groups(pc, block)) {
|
||||||
|
|
|
||||||
|
|
@ -5269,7 +5269,7 @@ static void si_init_config(struct si_context *sctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Compute LATE_ALLOC_VS.LIMIT. */
|
/* Compute LATE_ALLOC_VS.LIMIT. */
|
||||||
unsigned num_cu_per_sh = sscreen->info.num_good_cu_per_sh;
|
unsigned num_cu_per_sh = sscreen->info.min_good_cu_per_sa;
|
||||||
unsigned late_alloc_wave64 = 0; /* The limit is per SH. */
|
unsigned late_alloc_wave64 = 0; /* The limit is per SH. */
|
||||||
unsigned cu_mask_vs = 0xffff;
|
unsigned cu_mask_vs = 0xffff;
|
||||||
unsigned cu_mask_gs = 0xffff;
|
unsigned cu_mask_gs = 0xffff;
|
||||||
|
|
|
||||||
|
|
@ -1143,7 +1143,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
||||||
S_00B22C_LDS_SIZE(shader->config.lds_size));
|
S_00B22C_LDS_SIZE(shader->config.lds_size));
|
||||||
|
|
||||||
/* Determine LATE_ALLOC_GS. */
|
/* Determine LATE_ALLOC_GS. */
|
||||||
unsigned num_cu_per_sh = sscreen->info.num_good_cu_per_sh;
|
unsigned num_cu_per_sh = sscreen->info.min_good_cu_per_sa;
|
||||||
unsigned late_alloc_wave64; /* The limit is per SH. */
|
unsigned late_alloc_wave64; /* The limit is per SH. */
|
||||||
|
|
||||||
/* For Wave32, the hw will launch twice the number of late
|
/* For Wave32, the hw will launch twice the number of late
|
||||||
|
|
|
||||||
|
|
@ -532,7 +532,8 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
|
||||||
radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SH_PER_SE, NULL,
|
radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SH_PER_SE, NULL,
|
||||||
&ws->info.max_sh_per_se);
|
&ws->info.max_sh_per_se);
|
||||||
if (ws->gen == DRV_SI) {
|
if (ws->gen == DRV_SI) {
|
||||||
ws->info.num_good_cu_per_sh = ws->info.num_good_compute_units /
|
ws->info.max_good_cu_per_sa =
|
||||||
|
ws->info.min_good_cu_per_sa = ws->info.num_good_compute_units /
|
||||||
(ws->info.max_se * ws->info.max_sh_per_se);
|
(ws->info.max_se * ws->info.max_sh_per_se);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue