diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index eb7d7ad7c8d..b9f3499d4ca 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -1394,6 +1394,25 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info) info->family == CHIP_NAVI22 ? 8 : 4); } + if (info->gfx_level >= GFX11) { + switch (info->family) { + case CHIP_GFX1103_R1: + info->attribute_ring_size_per_se = 512 * 1024; + break; + case CHIP_GFX1103_R2: + /* TODO: Test if 192 * 1024 is faster. */ + info->attribute_ring_size_per_se = 256 * 1024; + break; + default: + info->attribute_ring_size_per_se = 1400 * 1024; + break; + } + + /* The size must be aligned to 64K per SE and must be at most 16M in total. */ + info->attribute_ring_size_per_se = align(info->attribute_ring_size_per_se, 64 * 1024); + assert(info->attribute_ring_size_per_se * info->max_se <= 16 * 1024 * 1024); + } + set_custom_cu_en_mask(info); const char *ib_filename = debug_get_option("AMD_PARSE_IB", NULL); @@ -1637,6 +1656,7 @@ void ac_print_gpu_info(struct radeon_info *info, FILE *f) fprintf(f, " max_vgpr_alloc = %i\n", info->max_vgpr_alloc); fprintf(f, " wave64_vgpr_alloc_granularity = %i\n", info->wave64_vgpr_alloc_granularity); fprintf(f, " max_scratch_waves = %i\n", info->max_scratch_waves); + fprintf(f, " attribute_ring_size_per_se = %u\n", info->attribute_ring_size_per_se); fprintf(f, "Render backend info:\n"); fprintf(f, " pa_sc_tile_steering_override = 0x%x\n", info->pa_sc_tile_steering_override); diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 76aa6b27e9d..4b72c4b4625 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -225,6 +225,7 @@ struct radeon_info { uint32_t max_vgpr_alloc; uint32_t wave64_vgpr_alloc_granularity; uint32_t max_scratch_waves; + uint32_t attribute_ring_size_per_se; /* Render backends (color + depth blocks). */ uint32_t r300_num_gb_pipes; diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index c6a18f96148..a7b3d411188 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -5541,11 +5541,8 @@ radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device : 0; if (device->physical_device->rad_info.gfx_level >= GFX11) { - /* TODO: tweak this */ - unsigned attr_ring_size_per_se = align(1400000, 64 * 1024); - unsigned attr_ring_size = attr_ring_size_per_se * device->physical_device->rad_info.max_se; - assert(attr_ring_size <= 16 * 1024 * 1024); /* maximum size */ - needs.attr_ring_size = attr_ring_size; + needs.attr_ring_size = device->physical_device->rad_info.attribute_ring_size_per_se * + device->physical_device->rad_info.max_se; } /* Return early if we already match these needs. diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 5f1f312ba36..ea1f00ebc8d 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -1426,10 +1426,7 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, sscreen->ngg_subgroup_size = 128; if (sscreen->info.gfx_level >= GFX11) { - /* TODO: tweak this */ - unsigned attr_ring_size_per_se = align(1400000, 64 * 1024); - unsigned attr_ring_size = attr_ring_size_per_se * sscreen->info.max_se; - assert(attr_ring_size <= 16 * 1024 * 1024); /* maximum size */ + unsigned attr_ring_size = sscreen->info.attribute_ring_size_per_se * sscreen->info.max_se; sscreen->attribute_ring = si_aligned_buffer_create(&sscreen->b, PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_32BIT | diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index e23ec055e6c..ae6f0afa002 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -5967,8 +5967,7 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing) si_pm4_set_reg(pm4, R_031118_SPI_ATTRIBUTE_RING_BASE, sscreen->attribute_ring->gpu_address >> 16); si_pm4_set_reg(pm4, R_03111C_SPI_ATTRIBUTE_RING_SIZE, - S_03111C_MEM_SIZE(((sscreen->attribute_ring->bo_size / - sscreen->info.max_se) >> 16) - 1) | + S_03111C_MEM_SIZE((sscreen->info.attribute_ring_size_per_se >> 16) - 1) | S_03111C_BIG_PAGE(sscreen->info.discardable_allows_big_page) | S_03111C_L1_POLICY(1)); }