diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index 30155e3ee7b..43c95ebb3eb 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -461,8 +461,8 @@ wait_imm perform_barrier(wait_ctx& ctx, memory_sync_info sync, unsigned semantic if (bar_scope_lds <= subgroup_scope) events &= ~event_lds; - /* in non-WGP, the L1/L0 cache keeps all memory operations in-order for the same workgroup */ - if (ctx.chip_class < GFX10 && sync.scope <= scope_workgroup) + /* in non-WGP, the L1 (L0 on GFX10+) cache keeps all memory operations in-order for the same workgroup */ + if (!ctx.program->wgp_mode && sync.scope <= scope_workgroup) events &= ~(event_vmem | event_vmem_store | event_smem); if (events) diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index cce524e705d..8aeca80c7dc 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -123,6 +123,8 @@ void init_program(Program *program, Stage stage, struct radv_shader_info *info, program->sgpr_limit = 104; } + program->wgp_mode = chip_class >= GFX10; /* assume WGP is used on Navi */ + program->next_fp_mode.preserve_signed_zero_inf_nan32 = false; program->next_fp_mode.preserve_signed_zero_inf_nan16_64 = false; program->next_fp_mode.must_flush_denorms32 = false; diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index d769f2062db..056530c8326 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1827,6 +1827,7 @@ public: uint16_t sgpr_alloc_granule; uint16_t vgpr_alloc_granule; /* must be power of two */ unsigned workgroup_size; /* if known; otherwise UINT_MAX */ + bool wgp_mode; bool xnack_enabled = false; bool sram_ecc_enabled = false; diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp index 5c9e1d1836b..8d5d75eff09 100644 --- a/src/amd/compiler/aco_live_var_analysis.cpp +++ b/src/amd/compiler/aco_live_var_analysis.cpp @@ -315,8 +315,7 @@ void calc_min_waves(Program* program) unsigned waves_per_workgroup = calc_waves_per_workgroup(program); unsigned simd_per_cu = program->chip_class >= GFX10 ? 2 : 4; - bool wgp = program->chip_class >= GFX10; /* assume WGP is used on Navi */ - unsigned simd_per_cu_wgp = wgp ? simd_per_cu * 2 : simd_per_cu; + unsigned simd_per_cu_wgp = program->wgp_mode ? simd_per_cu * 2 : simd_per_cu; program->min_waves = DIV_ROUND_UP(waves_per_workgroup, simd_per_cu_wgp); } @@ -333,9 +332,8 @@ void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand) unsigned simd_per_cu = program->chip_class >= GFX10 ? 2 : 4; - bool wgp = program->chip_class >= GFX10; /* assume WGP is used on Navi */ - unsigned simd_per_cu_wgp = wgp ? simd_per_cu * 2 : simd_per_cu; - unsigned lds_limit = wgp ? program->lds_limit * 2 : program->lds_limit; + unsigned simd_per_cu_wgp = program->wgp_mode ? simd_per_cu * 2 : simd_per_cu; + unsigned lds_limit = program->wgp_mode ? program->lds_limit * 2 : program->lds_limit; assert(program->min_waves >= 1); uint16_t sgpr_limit = get_addr_sgpr_from_waves(program, program->min_waves);