diff --git a/.pick_status.json b/.pick_status.json index 764dcb44f37..47f99752770 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -598,7 +598,7 @@ "description": "aco: don't create sendmsg(dealloc_vgprs) if scratch is used", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "2930317cea53843b4f3f2b25f11fba5ba82fda16" }, diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index dde6e99ae21..5411db93835 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -1082,6 +1082,11 @@ dealloc_vgprs(Program* program) if (program->max_reg_demand.vgpr <= get_addr_vgpr_from_waves(program, max_waves)) return false; + /* sendmsg(dealloc_vgprs) releases scratch, so this isn't safe if there is a in-progress scratch + * store. */ + if (uses_scratch(program)) + return false; + Block& block = program->blocks.back(); /* don't bother checking if there is a pending VMEM store or export: there almost always is */ diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 5af840f180c..9cfa33e91d0 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -2298,6 +2298,8 @@ uint16_t get_vgpr_alloc(Program* program, uint16_t addressable_vgprs); uint16_t get_addr_sgpr_from_waves(Program* program, uint16_t max_waves); uint16_t get_addr_vgpr_from_waves(Program* program, uint16_t max_waves); +bool uses_scratch(Program* program); + typedef struct { const int16_t opcode_gfx7[static_cast(aco_opcode::num_opcodes)]; const int16_t opcode_gfx9[static_cast(aco_opcode::num_opcodes)]; diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp index 91dfcebd059..40b4c0cf31d 100644 --- a/src/amd/compiler/aco_live_var_analysis.cpp +++ b/src/amd/compiler/aco_live_var_analysis.cpp @@ -298,15 +298,18 @@ calc_waves_per_workgroup(Program* program) } } /* end namespace */ +bool +uses_scratch(Program* program) +{ + /* RT uses scratch but we don't yet know how much. */ + return program->config->scratch_bytes_per_wave || program->stage == raytracing_cs; +} + uint16_t get_extra_sgprs(Program* program) { - /* We don't use this register on GFX6-8 and it's removed on GFX10+. RT uses scratch but we don't - * yet know how much. - */ - bool needs_flat_scr = - (program->config->scratch_bytes_per_wave || program->stage == raytracing_cs) && - program->gfx_level == GFX9; + /* We don't use this register on GFX6-8 and it's removed on GFX10+. */ + bool needs_flat_scr = uses_scratch(program) && program->gfx_level == GFX9; if (program->gfx_level >= GFX10) { assert(!program->dev.xnack_enabled);