diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 7aea65142fd..cce524e705d 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -103,8 +103,8 @@ void init_program(Program *program, Stage stage, struct radv_shader_info *info, program->vgpr_alloc_granule = 4; if (chip_class >= GFX10) { - program->physical_sgprs = 2560; /* doesn't matter as long as it's at least 128 * 20 */ - program->physical_vgprs = 512; + program->physical_sgprs = 5120; /* doesn't matter as long as it's at least 128 * 40 */ + program->physical_vgprs = program->wave_size == 32 ? 1024 : 512; program->sgpr_alloc_granule = 128; program->sgpr_limit = 108; /* includes VCC, which can be treated as s[106-107] on GFX10+ */ if (chip_class >= GFX10_3) diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp index 81782cbbe4a..5c9e1d1836b 100644 --- a/src/amd/compiler/aco_live_var_analysis.cpp +++ b/src/amd/compiler/aco_live_var_analysis.cpp @@ -313,9 +313,6 @@ uint16_t get_addr_vgpr_from_waves(Program *program, uint16_t waves) void calc_min_waves(Program* program) { unsigned waves_per_workgroup = calc_waves_per_workgroup(program); - /* currently min_waves is in wave64 waves */ - if (program->wave_size == 32) - waves_per_workgroup = DIV_ROUND_UP(waves_per_workgroup, 2); unsigned simd_per_cu = program->chip_class >= GFX10 ? 2 : 4; bool wgp = program->chip_class >= GFX10; /* assume WGP is used on Navi */ @@ -331,6 +328,9 @@ void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand) max_waves_per_simd = 16; else if (program->family >= CHIP_POLARIS10 && program->family <= CHIP_VEGAM) max_waves_per_simd = 8; + if (program->wave_size == 32) + max_waves_per_simd *= 2; + unsigned simd_per_cu = program->chip_class >= GFX10 ? 2 : 4; bool wgp = program->chip_class >= GFX10; /* assume WGP is used on Navi */