From d1ff9e951abc2fe7054fa070a24ce4cf823dad5b Mon Sep 17 00:00:00 2001 From: Natalie Vock Date: Mon, 17 Feb 2025 18:42:47 +0100 Subject: [PATCH] aco: Fix RT VGPR limit on Navi31/32, GFX11.5, GFX12 Since 128 is not a multiple of the VGPR allocation granule, we will actually allocate 134 VGPRs. No reason not to use the extra 6. Part-of: --- src/amd/compiler/aco_ir.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index fd7da14aa44..f30f951510d 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -95,7 +95,7 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info, /* apparently gfx702 also has 16-bank LDS but I can't find a family for that */ program->dev.has_16bank_lds = family == CHIP_KABINI || family == CHIP_STONEY; - program->dev.vgpr_limit = stage == raytracing_cs ? 128 : 256; + program->dev.vgpr_limit = 256; program->dev.physical_vgprs = 256; program->dev.vgpr_alloc_granule = 4; @@ -128,6 +128,9 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info, program->dev.sgpr_limit = 104; } + if (program->stage == raytracing_cs) + program->dev.vgpr_limit = util_align_npot(128, program->dev.vgpr_alloc_granule); + program->dev.scratch_alloc_granule = gfx_level >= GFX11 ? 256 : 1024; program->dev.max_waves_per_simd = 10;