diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 1b370b39569..46ed5cbc61c 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -1528,6 +1528,11 @@ get_tied_defs(Instruction* instr) /* VADDR starts at 3. */ ops.push_back(3 + 4); ops.push_back(3 + 7); + } else if (instr->opcode == aco_opcode::s_bitset0_b32 || + instr->opcode == aco_opcode::s_bitset1_b32 || + instr->opcode == aco_opcode::s_bitset0_b64 || + instr->opcode == aco_opcode::s_bitset1_b64) { + ops.push_back(1); } return ops; } diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 985a36f475a..2c75ae656bf 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -1180,6 +1180,12 @@ alu_opt_info_is_valid(opt_ctx& ctx, alu_opt_info& info) lmask[2] = true; info.opcode = aco_opcode::s_fmaak_f32; } + } else if ((info.opcode == aco_opcode::s_bitset0_b32 || + info.opcode == aco_opcode::s_bitset1_b32 || + info.opcode == aco_opcode::s_bitset0_b64 || + info.opcode == aco_opcode::s_bitset1_b64) && + !smask[1]) { + return false; } if ((info.opcode == aco_opcode::s_fmac_f16 || info.opcode == aco_opcode::s_fmac_f32) &&