aco/ra: create s_bitset

fossil-db (navi31):
Totals from 33092 (16.35% of 202426) affected shaders:
Instrs: 16722717 -> 16696250 (-0.16%); split: -0.16%, +0.00%
CodeSize: 90003664 -> 89779940 (-0.25%); split: -0.25%, +0.00%
Latency: 123990480 -> 123934891 (-0.04%); split: -0.05%, +0.00%
InvThroughput: 20972033 -> 20971140 (-0.00%); split: -0.01%, +0.00%

fossil-db (navi21):
Totals from 6776 (3.35% of 202427) affected shaders:
Instrs: 11167123 -> 11166438 (-0.01%)
CodeSize: 62605436 -> 62573220 (-0.05%); split: -0.05%, +0.00%
Latency: 238610061 -> 238603224 (-0.00%); split: -0.00%, +0.00%
InvThroughput: 55148639 -> 55148624 (-0.00%); split: -0.00%, +0.00%
Copies: 1211216 -> 1210612 (-0.05%); split: -0.05%, +0.00%
SALU: 1436679 -> 1435997 (-0.05%)

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40889>
This commit is contained in:
Rhys Perry 2026-04-10 16:46:14 +01:00 committed by Marge Bot
parent 88dcda1078
commit 984aa6e085

View file

@ -3222,6 +3222,29 @@ sop2_can_use_sopk(ra_ctx& ctx, Instruction* instr)
return true;
}
bool
sop2_can_use_bitset(ra_ctx& ctx, Instruction* instr)
{
if (instr->opcode != aco_opcode::s_and_b32 && instr->opcode != aco_opcode::s_and_b64 &&
instr->opcode != aco_opcode::s_or_b32 && instr->opcode != aco_opcode::s_or_b64)
return false;
uint32_t const_idx = instr->operands[0].isConstant() ? 0 : 1;
if (!instr->definitions[1].isKill() || !instr->operands[const_idx].isConstant() ||
!instr->operands[!const_idx].isTemp() || !instr->operands[!const_idx].isKillBeforeDef())
return false;
uint64_t val = instr->operands[const_idx].constantValue64();
switch (instr->opcode) {
case aco_opcode::s_and_b32: return util_bitcount(val) == 31;
case aco_opcode::s_and_b64: return util_bitcount64(val) == 63;
case aco_opcode::s_or_b32: return util_bitcount(val) == 1;
case aco_opcode::s_or_b64: return util_bitcount64(val) == 1;
default: return false;
}
}
void
create_phi_vector_affinities(ra_ctx& ctx, aco_ptr<Instruction>& instr,
std::map<Operand*, std::vector<vector_info>>& vector_phis)
@ -3370,6 +3393,8 @@ get_affinities(ra_ctx& ctx)
op = instr->operands[2];
} else if (i == 0 && sop2_can_use_sopk(ctx, instr.get())) {
op = instr->operands[instr->operands[0].isLiteral()];
} else if (i == 0 && sop2_can_use_bitset(ctx, instr.get())) {
op = instr->operands[instr->operands[0].isConstant()];
} else {
continue;
}
@ -3583,34 +3608,56 @@ optimize_encoding_vop2(ra_ctx& ctx, RegisterFile& register_file, aco_ptr<Instruc
}
void
optimize_encoding_sopk(ra_ctx& ctx, RegisterFile& register_file, aco_ptr<Instruction>& instr)
optimize_encoding_sopk_sop1(ra_ctx& ctx, RegisterFile& register_file, aco_ptr<Instruction>& instr)
{
/* try to optimize sop2 with literal source to sopk */
if (!sop2_can_use_sopk(ctx, instr.get()))
/* try to optimize sop2 with literal source to sopk, or s_and/s_or to s_bitset */
bool sopk = sop2_can_use_sopk(ctx, instr.get());
bool bitset = sop2_can_use_bitset(ctx, instr.get());
if (!sopk && !bitset)
return;
unsigned literal_idx = instr->operands[1].isLiteral();
unsigned const_idx = instr->operands[1].isConstant();
PhysReg op_reg = instr->operands[!literal_idx].physReg();
PhysReg op_reg = instr->operands[!const_idx].physReg();
if (!is_sgpr_writable_without_side_effects(ctx.program->gfx_level, op_reg))
return;
if (affinity_blocks_tied_def0(ctx, register_file, instr.get(), !literal_idx))
if (affinity_blocks_tied_def0(ctx, register_file, instr.get(), !const_idx))
return;
instr->format = Format::SOPK;
instr->salu().imm = instr->operands[literal_idx].constantValue() & 0xffff;
if (literal_idx == 0)
std::swap(instr->operands[0], instr->operands[1]);
if (instr->operands.size() > 2)
std::swap(instr->operands[1], instr->operands[2]);
instr->operands.pop_back();
if (sopk) {
instr->format = Format::SOPK;
instr->salu().imm = instr->operands[const_idx].constantValue() & 0xffff;
if (const_idx == 0)
std::swap(instr->operands[0], instr->operands[1]);
if (instr->operands.size() > 2)
std::swap(instr->operands[1], instr->operands[2]);
instr->operands.pop_back();
switch (instr->opcode) {
case aco_opcode::s_add_u32:
case aco_opcode::s_add_i32: instr->opcode = aco_opcode::s_addk_i32; break;
case aco_opcode::s_mul_i32: instr->opcode = aco_opcode::s_mulk_i32; break;
case aco_opcode::s_cselect_b32: instr->opcode = aco_opcode::s_cmovk_i32; break;
default: UNREACHABLE("illegal instruction");
switch (instr->opcode) {
case aco_opcode::s_add_u32:
case aco_opcode::s_add_i32: instr->opcode = aco_opcode::s_addk_i32; break;
case aco_opcode::s_mul_i32: instr->opcode = aco_opcode::s_mulk_i32; break;
case aco_opcode::s_cselect_b32: instr->opcode = aco_opcode::s_cmovk_i32; break;
default: UNREACHABLE("illegal instruction");
}
} else {
instr->format = Format::SOP1;
if (const_idx == 1)
std::swap(instr->operands[0], instr->operands[1]);
instr->definitions.pop_back();
switch (instr->opcode) {
case aco_opcode::s_and_b32: instr->opcode = aco_opcode::s_bitset0_b32; break;
case aco_opcode::s_and_b64: instr->opcode = aco_opcode::s_bitset0_b64; break;
case aco_opcode::s_or_b32: instr->opcode = aco_opcode::s_bitset1_b32; break;
case aco_opcode::s_or_b64: instr->opcode = aco_opcode::s_bitset1_b64; break;
default: UNREACHABLE("illegal instruction");
}
uint64_t val = instr->operands[0].constantValue64();
if (instr->opcode == aco_opcode::s_bitset0_b32 || instr->opcode == aco_opcode::s_bitset0_b64)
val = ~val;
instr->operands[0] = Operand::c32(ffsll(val) - 1);
}
}
@ -3620,7 +3667,7 @@ optimize_encoding(ra_ctx& ctx, RegisterFile& register_file, aco_ptr<Instruction>
if (instr->isVALU())
optimize_encoding_vop2(ctx, register_file, instr);
if (instr->isSALU())
optimize_encoding_sopk(ctx, register_file, instr);
optimize_encoding_sopk_sop1(ctx, register_file, instr);
}
void