From f520f4c299c2c976c14eeea8bd0691e709d5e6db Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 28 Jan 2021 11:07:26 +0000 Subject: [PATCH] aco: add Program::wgp_mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of assuming WGP mode on GFX10+ in different places, add a member to Program that can be used instead. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_insert_waitcnt.cpp | 4 ++-- src/amd/compiler/aco_ir.cpp | 2 ++ src/amd/compiler/aco_ir.h | 1 + src/amd/compiler/aco_live_var_analysis.cpp | 8 +++----- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index 30155e3ee7b..43c95ebb3eb 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -461,8 +461,8 @@ wait_imm perform_barrier(wait_ctx& ctx, memory_sync_info sync, unsigned semantic if (bar_scope_lds <= subgroup_scope) events &= ~event_lds; - /* in non-WGP, the L1/L0 cache keeps all memory operations in-order for the same workgroup */ - if (ctx.chip_class < GFX10 && sync.scope <= scope_workgroup) + /* in non-WGP, the L1 (L0 on GFX10+) cache keeps all memory operations in-order for the same workgroup */ + if (!ctx.program->wgp_mode && sync.scope <= scope_workgroup) events &= ~(event_vmem | event_vmem_store | event_smem); if (events) diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index cce524e705d..8aeca80c7dc 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -123,6 +123,8 @@ void init_program(Program *program, Stage stage, struct radv_shader_info *info, program->sgpr_limit = 104; } + program->wgp_mode = chip_class >= GFX10; /* assume WGP is used on Navi */ + program->next_fp_mode.preserve_signed_zero_inf_nan32 = false; program->next_fp_mode.preserve_signed_zero_inf_nan16_64 = false; program->next_fp_mode.must_flush_denorms32 = false; diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index d769f2062db..056530c8326 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1827,6 +1827,7 @@ public: uint16_t sgpr_alloc_granule; uint16_t vgpr_alloc_granule; /* must be power of two */ unsigned workgroup_size; /* if known; otherwise UINT_MAX */ + bool wgp_mode; bool xnack_enabled = false; bool sram_ecc_enabled = false; diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp index 5c9e1d1836b..8d5d75eff09 100644 --- a/src/amd/compiler/aco_live_var_analysis.cpp +++ b/src/amd/compiler/aco_live_var_analysis.cpp @@ -315,8 +315,7 @@ void calc_min_waves(Program* program) unsigned waves_per_workgroup = calc_waves_per_workgroup(program); unsigned simd_per_cu = program->chip_class >= GFX10 ? 2 : 4; - bool wgp = program->chip_class >= GFX10; /* assume WGP is used on Navi */ - unsigned simd_per_cu_wgp = wgp ? simd_per_cu * 2 : simd_per_cu; + unsigned simd_per_cu_wgp = program->wgp_mode ? simd_per_cu * 2 : simd_per_cu; program->min_waves = DIV_ROUND_UP(waves_per_workgroup, simd_per_cu_wgp); } @@ -333,9 +332,8 @@ void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand) unsigned simd_per_cu = program->chip_class >= GFX10 ? 2 : 4; - bool wgp = program->chip_class >= GFX10; /* assume WGP is used on Navi */ - unsigned simd_per_cu_wgp = wgp ? simd_per_cu * 2 : simd_per_cu; - unsigned lds_limit = wgp ? program->lds_limit * 2 : program->lds_limit; + unsigned simd_per_cu_wgp = program->wgp_mode ? simd_per_cu * 2 : simd_per_cu; + unsigned lds_limit = program->wgp_mode ? program->lds_limit * 2 : program->lds_limit; assert(program->min_waves >= 1); uint16_t sgpr_limit = get_addr_sgpr_from_waves(program, program->min_waves);