diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp index 859a3cd04d1..1a04dd1582f 100644 --- a/src/amd/compiler/aco_insert_exec_mask.cpp +++ b/src/amd/compiler/aco_insert_exec_mask.cpp @@ -103,7 +103,8 @@ needs_exact(aco_ptr& instr) * epilog without considering the exec mask. */ return instr->isEXP() || instr->opcode == aco_opcode::p_jump_to_epilog || - instr->opcode == aco_opcode::p_dual_src_export_gfx11; + instr->opcode == aco_opcode::p_dual_src_export_gfx11 || + instr->opcode == aco_opcode::p_end_with_regs; } } diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py index 04b2645b61a..c6766934aa4 100644 --- a/src/amd/compiler/aco_opcodes.py +++ b/src/amd/compiler/aco_opcodes.py @@ -375,6 +375,10 @@ opcode("p_interp_gfx11") # performs dual source MRTs swizzling and emits exports on GFX11 opcode("p_dual_src_export_gfx11") +# Let shader end with specific registers set to wanted value, used by multi part +# shader to pass arguments to next part. +opcode("p_end_with_regs") + # SOP2 instructions: 2 scalar inputs, 1 scalar output (+optional scc) SOP2 = { # GFX6, GFX7, GFX8, GFX9, GFX10,GFX11,name diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp index d0cd09db362..592e42c54c4 100644 --- a/src/amd/compiler/aco_scheduler.cpp +++ b/src/amd/compiler/aco_scheduler.cpp @@ -611,7 +611,8 @@ perform_hazard_query(hazard_query* query, Instruction* instr, bool upwards) instr->opcode == aco_opcode::p_init_scratch || instr->opcode == aco_opcode::p_jump_to_epilog || instr->opcode == aco_opcode::s_sendmsg_rtn_b32 || - instr->opcode == aco_opcode::s_sendmsg_rtn_b64) + instr->opcode == aco_opcode::s_sendmsg_rtn_b64 || + instr->opcode == aco_opcode::p_end_with_regs) return hazard_fail_unreorderable; memory_event_set instr_set; diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp index 506ff8039a6..781a25538af 100644 --- a/src/amd/compiler/aco_validate.cpp +++ b/src/amd/compiler/aco_validate.cpp @@ -266,6 +266,7 @@ validate_ir(Program* program) instr->opcode == aco_opcode::p_create_vector || instr->opcode == aco_opcode::p_jump_to_epilog || instr->opcode == aco_opcode::p_dual_src_export_gfx11 || + instr->opcode == aco_opcode::p_end_with_regs || (instr->opcode == aco_opcode::p_interp_gfx11 && i == 0) || (instr->opcode == aco_opcode::p_bpermute_gfx11w64 && i == 0) || (flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) ||