From a5b4ae67ae60d69418fc9cc879e5aa43ea5004e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 12 Jul 2024 17:45:59 -0400 Subject: [PATCH] ac: add radeon_info::has_scratch_base_registers Fixes: 3b0bfd254f7 - radeonsi/gfx11: make flat_scratch changes for compute Reviewed-by: Qiang Yu Part-of: --- src/amd/common/ac_gpu_info.c | 3 +++ src/amd/common/ac_gpu_info.h | 1 + src/gallium/drivers/radeonsi/si_compute.c | 6 ++---- src/gallium/drivers/radeonsi/si_shader.c | 3 +-- src/gallium/drivers/radeonsi/si_state_shaders.cpp | 2 +- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 2c85c1af101..0d475cdab63 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -1597,6 +1597,8 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, const unsigned max_waves_per_tg = 32; /* 1024 threads in Wave32 */ info->max_scratch_waves = MAX2(32 * info->min_good_cu_per_sa * info->max_sa_per_se * info->num_se, max_waves_per_tg); + info->has_scratch_base_registers = info->gfx_level >= GFX11 || + (!info->has_graphics && info->family >= CHIP_GFX940); info->max_gflops = (info->gfx_level >= GFX11 ? 256 : 128) * info->num_cu * info->max_gpu_freq_mhz / 1000; info->memory_bandwidth_gbps = DIV_ROUND_UP(info->memory_freq_mhz_effective * info->memory_bus_width / 8, 1000); info->has_pcie_bandwidth_info = info->drm_minor >= 51; @@ -2035,6 +2037,7 @@ void ac_print_gpu_info(const struct radeon_info *info, FILE *f) fprintf(f, " max_vgpr_alloc = %i\n", info->max_vgpr_alloc); fprintf(f, " wave64_vgpr_alloc_granularity = %i\n", info->wave64_vgpr_alloc_granularity); fprintf(f, " max_scratch_waves = %i\n", info->max_scratch_waves); + fprintf(f, " has_scratch_base_registers = %i\n", info->has_scratch_base_registers); fprintf(f, "Ring info:\n"); fprintf(f, " attribute_ring_size_per_se = %u KB\n", DIV_ROUND_UP(info->attribute_ring_size_per_se, 1024)); diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 218ddd13091..a6baed0dafd 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -270,6 +270,7 @@ struct radeon_info { uint32_t max_vgpr_alloc; uint32_t wave64_vgpr_alloc_granularity; uint32_t max_scratch_waves; + bool has_scratch_base_registers; /* Pos, prim, and attribute rings. */ uint32_t attribute_ring_size_per_se; /* GFX11+ */ diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 3bd423252bc..e6efa1bf691 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -406,7 +406,7 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx, struct si_s } /* Set the scratch address in the shader binary. */ - if (sctx->gfx_level < GFX11 && (sctx->family < CHIP_GFX940 || sctx->screen->info.has_graphics)) { + if (!sctx->screen->info.has_scratch_base_registers) { uint64_t scratch_va = sctx->compute_scratch_buffer->gpu_address; if (shader->scratch_va != scratch_va) { @@ -552,9 +552,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, struct si_compute radeon_opt_set_sh_reg(R_00B860_COMPUTE_TMPRING_SIZE, SI_TRACKED_COMPUTE_TMPRING_SIZE, sctx->compute_tmpring_size); - if (config->scratch_bytes_per_wave && - (sctx->gfx_level >= GFX11 || - (sctx->family >= CHIP_GFX940 && !sctx->screen->info.has_graphics))) { + if (config->scratch_bytes_per_wave && sctx->screen->info.has_scratch_base_registers) { radeon_opt_set_sh_reg2(R_00B840_COMPUTE_DISPATCH_SCRATCH_BASE_LO, SI_TRACKED_COMPUTE_DISPATCH_SCRATCH_BASE_LO, sctx->compute_scratch_buffer->gpu_address >> 8, diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index dc3d1c36315..d5a2eca5d4b 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3014,8 +3014,7 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi } /* Add/remove the scratch offset to/from input SGPRs. */ - if (sel->screen->info.gfx_level < GFX11 && - (sel->screen->info.family < CHIP_GFX940 || sel->screen->info.has_graphics) && + if (!sel->screen->info.has_scratch_base_registers && !si_is_merged_shader(shader)) { if (sel->info.base.use_aco_amd) { /* When aco scratch_offset arg is added explicitly at the beginning. diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 8a4164b5211..108cce44489 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -4379,7 +4379,7 @@ bool si_update_spi_tmpring_size(struct si_context *sctx, unsigned bytes) return false; } - if (sctx->gfx_level < GFX11 && !si_update_scratch_relocs(sctx)) + if (!sctx->screen->info.has_scratch_base_registers && !si_update_scratch_relocs(sctx)) return false; }