amd: unify and tune the attribute ring size for gfx11

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21041>
This commit is contained in:
Marek Olšák 2023-01-25 04:37:54 -05:00 committed by Marge Bot
parent e25f08baf2
commit bfc37e7c63
5 changed files with 25 additions and 11 deletions

View file

@ -1394,6 +1394,25 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info)
info->family == CHIP_NAVI22 ? 8 : 4);
}
if (info->gfx_level >= GFX11) {
switch (info->family) {
case CHIP_GFX1103_R1:
info->attribute_ring_size_per_se = 512 * 1024;
break;
case CHIP_GFX1103_R2:
/* TODO: Test if 192 * 1024 is faster. */
info->attribute_ring_size_per_se = 256 * 1024;
break;
default:
info->attribute_ring_size_per_se = 1400 * 1024;
break;
}
/* The size must be aligned to 64K per SE and must be at most 16M in total. */
info->attribute_ring_size_per_se = align(info->attribute_ring_size_per_se, 64 * 1024);
assert(info->attribute_ring_size_per_se * info->max_se <= 16 * 1024 * 1024);
}
set_custom_cu_en_mask(info);
const char *ib_filename = debug_get_option("AMD_PARSE_IB", NULL);
@ -1637,6 +1656,7 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f)
fprintf(f, " max_vgpr_alloc = %i\n", info->max_vgpr_alloc);
fprintf(f, " wave64_vgpr_alloc_granularity = %i\n", info->wave64_vgpr_alloc_granularity);
fprintf(f, " max_scratch_waves = %i\n", info->max_scratch_waves);
fprintf(f, " attribute_ring_size_per_se = %u\n", info->attribute_ring_size_per_se);
fprintf(f, "Render backend info:\n");
fprintf(f, " pa_sc_tile_steering_override = 0x%x\n", info->pa_sc_tile_steering_override);

View file

@ -225,6 +225,7 @@ struct radeon_info {
uint32_t max_vgpr_alloc;
uint32_t wave64_vgpr_alloc_granularity;
uint32_t max_scratch_waves;
uint32_t attribute_ring_size_per_se;
/* Render backends (color + depth blocks). */
uint32_t r300_num_gb_pipes;

View file

@ -5541,11 +5541,8 @@ radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device
: 0;
if (device->physical_device->rad_info.gfx_level >= GFX11) {
/* TODO: tweak this */
unsigned attr_ring_size_per_se = align(1400000, 64 * 1024);
unsigned attr_ring_size = attr_ring_size_per_se * device->physical_device->rad_info.max_se;
assert(attr_ring_size <= 16 * 1024 * 1024); /* maximum size */
needs.attr_ring_size = attr_ring_size;
needs.attr_ring_size = device->physical_device->rad_info.attribute_ring_size_per_se *
device->physical_device->rad_info.max_se;
}
/* Return early if we already match these needs.

View file

@ -1426,10 +1426,7 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
sscreen->ngg_subgroup_size = 128;
if (sscreen->info.gfx_level >= GFX11) {
/* TODO: tweak this */
unsigned attr_ring_size_per_se = align(1400000, 64 * 1024);
unsigned attr_ring_size = attr_ring_size_per_se * sscreen->info.max_se;
assert(attr_ring_size <= 16 * 1024 * 1024); /* maximum size */
unsigned attr_ring_size = sscreen->info.attribute_ring_size_per_se * sscreen->info.max_se;
sscreen->attribute_ring = si_aligned_buffer_create(&sscreen->b,
PIPE_RESOURCE_FLAG_UNMAPPABLE |
SI_RESOURCE_FLAG_32BIT |

View file

@ -5967,8 +5967,7 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
si_pm4_set_reg(pm4, R_031118_SPI_ATTRIBUTE_RING_BASE,
sscreen->attribute_ring->gpu_address >> 16);
si_pm4_set_reg(pm4, R_03111C_SPI_ATTRIBUTE_RING_SIZE,
S_03111C_MEM_SIZE(((sscreen->attribute_ring->bo_size /
sscreen->info.max_se) >> 16) - 1) |
S_03111C_MEM_SIZE((sscreen->info.attribute_ring_size_per_se >> 16) - 1) |
S_03111C_BIG_PAGE(sscreen->info.discardable_allows_big_page) |
S_03111C_L1_POLICY(1));
}