r600/sfn: handle the IF predicate in the scheduler

This involves also emitting the CF for the blocks in the
sfn-assembler and we have to remove the special handling in the
old backend assembler.

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37096>
This commit is contained in:
Gert Wollny 2025-08-29 00:03:13 +02:00 committed by Marge Bot
parent 359bfc3138
commit 713edb5998
5 changed files with 63 additions and 46 deletions

View file

@ -1248,7 +1248,6 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
const struct r600_bytecode_alu *alu, unsigned type)
{
struct r600_bytecode_alu *nalu = r600_bytecode_alu();
struct r600_bytecode_alu *lalu;
int i, r;
if (!nalu)
@ -1260,22 +1259,12 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs);
}
if (bc->cf_last != NULL && bc->cf_last->op != type) {
if (bc->cf_last != NULL && bc->cf_last->op != type && !bc->force_add_cf) {
/* check if we could add it anyway */
if ((bc->cf_last->op == CF_OP_ALU && type == CF_OP_ALU_PUSH_BEFORE) ||
(bc->cf_last->op == CF_OP_ALU_PUSH_BEFORE && type == CF_OP_ALU)) {
LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) {
if (lalu->execute_mask) {
assert(bc->force_add_cf || !"no force cf");
bc->force_add_cf = 1;
break;
}
if (bc->cf_last->op == CF_OP_ALU_PUSH_BEFORE && type == CF_OP_ALU)
type = CF_OP_ALU_PUSH_BEFORE;
}
} else {
assert(bc->force_add_cf ||!"no force cf");
bc->force_add_cf = 1;
}
else
assert(!"Try adding ALU with unsipported CF type to ALU_PUSH_BEFORE");
}
/* cf can contains only alu or only vtx or only tex */

View file

@ -361,15 +361,14 @@ AssamblerVisitor::emit_alu_op(const AluInstr& ai)
if (dst)
sfn_log << SfnLog::assembly << " Current dst register is " << *dst << "\n";
/*
auto cf_op = ai.cf_type();
unsigned type = 0;
switch (cf_op) {
case cf_alu:
type = CF_OP_ALU;
break;
case cf_alu_push_before:
type = CF_OP_ALU_PUSH_BEFORE;
type = CF_OP_ALU;
break;
case cf_alu_pop_after:
type = CF_OP_ALU_POP_AFTER;
@ -392,11 +391,11 @@ AssamblerVisitor::emit_alu_op(const AluInstr& ai)
default:
assert(0 && "cf_alu_undefined should have been replaced");
}
*/
if (alu.last)
m_nliterals_in_group.clear();
m_result = !r600_bytecode_add_alu_type(m_bc, &alu, type);
m_result = !r600_bytecode_add_alu(m_bc, &alu);
if (unlikely(ai.opcode() == op1_mova_int)) {
if (m_bc->gfx_level < CAYMAN || alu.dst.sel == 0) {
@ -851,7 +850,9 @@ AssamblerVisitor::visit(const Block& block)
if (block.empty())
return;
if (block.has_instr_flag(Instr::force_cf)) {
if (block.cf_start())
block.cf_start()->accept(*this);
else if (block.has_instr_flag(Instr::force_cf)) {
m_bc->force_add_cf = 1;
m_bc->ar_loaded = 0;
m_last_addr = nullptr;
@ -874,15 +875,12 @@ AssamblerVisitor::visit(const Block& block)
void
AssamblerVisitor::visit(const IfInstr& instr)
{
emit_alu_push_before();
auto pred = instr.predicate();
auto [addr, dummy0, dummy1] = pred->indirect_addr();
assert(!dummy1);
assert(!addr);
pred->accept(*this);
r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP);
clear_states(sf_all);
@ -892,6 +890,8 @@ AssamblerVisitor::visit(const IfInstr& instr)
void
AssamblerVisitor::visit(const ControlFlowInstr& instr)
{
sfn_log << SfnLog::assembly << "Translate " << instr << " ";
clear_states(sf_all);
switch (instr.cf_type()) {
case ControlFlowInstr::cf_else:
@ -926,6 +926,21 @@ AssamblerVisitor::visit(const ControlFlowInstr& instr)
m_result = false;
}
} break;
case ControlFlowInstr::cf_alu:
r600_bytecode_add_cfinst(m_bc, CF_OP_ALU);
break;
case ControlFlowInstr::cf_alu_push_before:
emit_alu_push_before();
break;
case ControlFlowInstr::cf_gds:
r600_bytecode_add_cfinst(m_bc, CF_OP_GDS);
break;
case ControlFlowInstr::cf_tex:
r600_bytecode_add_cfinst(m_bc, CF_OP_TEX);
break;
case ControlFlowInstr::cf_vtx:
r600_bytecode_add_cfinst(m_bc, CF_OP_VTX);
break;
default:
UNREACHABLE("Unknown CF instruction type");
}

View file

@ -374,7 +374,6 @@ LiveRangeInstrVisitor::visit(IfInstr *instr)
{
int b = m_block;
m_block = -1;
instr->predicate()->accept(*this);
scope_if();
m_block = b;
}

View file

@ -72,6 +72,7 @@ public:
{
assert(!m_cf_instr);
m_cf_instr = instr;
predicate = instr->predicate();
}
void visit(EmitVertexInstr *instr) override
@ -121,6 +122,8 @@ public:
std::list<Instr *> gds_instr;
std::list<Instr *> waitacks;
AluInstr *predicate{nullptr};
Instr *m_cf_instr{nullptr};
ValueFactory& m_value_factory;
@ -156,7 +159,8 @@ private:
bool collect_ready_type(std::list<T *>& ready, std::list<T *>& orig);
bool collect_ready_alu_vec(std::list<AluInstr *>& ready,
std::list<AluInstr *>& available);
std::list<AluInstr *>& available,
AluInstr **predicate);
bool schedule_tex(Shader::ShaderBlocks& out_blocks);
bool schedule_vtx(Shader::ShaderBlocks& out_blocks);
@ -500,10 +504,6 @@ BlockScheduler::schedule_block(Block& in_block,
assert(!fail);
if (cir.m_cf_instr) {
// Assert that if condition is ready
if (m_current_block->type() != Block::alu) {
start_new_block(out_blocks, Block::alu);
}
m_current_block->push_back(cir.m_cf_instr);
cir.m_cf_instr->set_scheduled();
}
@ -787,18 +787,13 @@ BlockScheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type ty
void BlockScheduler::maybe_split_alu_block(Shader::ShaderBlocks& out_blocks)
{
// TODO: needs fixing
if (m_current_block->remaining_slots() > 0) {
out_blocks.push_back(m_current_block);
return;
}
int used_slots = 0;
int pending_slots = 0;
Instr *next_block_start = nullptr;
for (auto cur_group : *m_current_block) {
/* This limit is a bit fishy, it should be 128 */
if (used_slots + pending_slots + cur_group->slots() < 128) {
if (cur_group->can_start_alu_block()) {
next_block_start = cur_group;
@ -843,6 +838,8 @@ void BlockScheduler::maybe_split_alu_block(Shader::ShaderBlocks& out_blocks)
if (group->has_lds_group_end())
sub_block->lds_group_end();
if (group->require_push())
sub_block->cf_start()->promote_alu_cf(ControlFlowInstr::cf_alu_push_before);
}
if (!sub_block->empty())
out_blocks.push_back(sub_block);
@ -1137,7 +1134,6 @@ BlockScheduler::collect_ready(CollectInstructions& available)
{
sfn_log << SfnLog::schedule << "Ready instructions\n";
bool result = false;
result |= collect_ready_alu_vec(alu_vec_ready, available.alu_vec);
result |= collect_ready_type(alu_trans_ready, available.alu_trans);
result |= collect_ready_type(alu_multi_slot_ready, available.alu_multi_slot);
result |= collect_ready_type(alu_groups_ready, available.alu_groups);
@ -1147,13 +1143,22 @@ BlockScheduler::collect_ready(CollectInstructions& available)
result |= collect_ready_type(free_ready, available.free_instr);
result |= collect_ready_type(waitacks_ready, available.waitacks);
if (!result && available.predicate && available.alu_groups.empty() &&
available.gds_instr.empty() && available.tex.empty() &&
available.fetches.empty() && available.free_instr.empty())
result |=
collect_ready_alu_vec(alu_vec_ready, available.alu_vec, &available.predicate);
else
result |= collect_ready_alu_vec(alu_vec_ready, available.alu_vec, nullptr);
sfn_log << SfnLog::schedule << "\n";
return result;
}
bool
BlockScheduler::collect_ready_alu_vec(std::list<AluInstr *>& ready,
std::list<AluInstr *>& available)
std::list<AluInstr *>& available,
AluInstr **predicate)
{
auto i = available.begin();
auto e = available.end();
@ -1215,6 +1220,12 @@ BlockScheduler::collect_ready_alu_vec(std::list<AluInstr *>& ready,
++i;
}
if (predicate && *predicate && available.empty() && ready.size() < 16 &&
(*predicate)->ready()) {
ready.push_back(*predicate);
*predicate = nullptr;
}
for (auto& i : ready)
sfn_log << SfnLog::schedule << "V: " << *i << "\n";

View file

@ -1860,7 +1860,7 @@ OUTPUT LOC:4 VARYING_SLOT:35 MASK:15
SYSVALUES R1.xyzw
ARRAYS A2[4].xy A2[4].zw
SHADER
BLOCK_START ALU
BLOCK_START ALU_PUSH_BEFORE
ALU_GROUP_BEGIN
ALU MOV A2[0].x : I[1.0] {W}
ALU MOV A2[0].y : L[0x3f8ccccd] {W}
@ -1908,10 +1908,12 @@ ALU_GROUP_BEGIN
ALU MULADD_IEEE S17.w : KC0[3].w R1.z@fully S15.w {WL}
ALU_GROUP_END
ALU_GROUP_BEGIN
ALU PRED_SETGE_INT __.x@chan : KC0[0].x L[0x4] {EP} PUSH_BEFORE
ALU MULADD_IEEE S19.z@group : KC0[4].z R1.w@fully S17.z {W}
ALU MULADD_IEEE S19.w@group : KC0[4].w R1.w@fully S17.w {WL}
ALU_GROUP_END
IF (( ALU PRED_SETGE_INT __.x@free : KC0[0].x L[0x4] {LEP} PUSH_BEFORE ))
IF (( ALU PRED_SETGE_INT __.x@chan : KC0[0].x L[0x4] {EP} PUSH_BEFORE ))
BLOCK_END
BLOCK_START ALU
ALU_GROUP_BEGIN
ALU ADD_INT S34.x : KC0[0].x L[0xfffffffc] {WL}
@ -2467,7 +2469,7 @@ ALU_GROUP_BEGIN
ALU_GROUP_END
LOOP_BEGIN
BLOCK_END
BLOCK_START ALU
BLOCK_START ALU_PUSH_BEFORE
ALU_GROUP_BEGIN
ALU RECIPSQRT_IEEE S3.x@chan : |R1.x@free| {W}
ALU RECIPSQRT_IEEE __.y@chgr : |R1.x@free| {}
@ -2476,12 +2478,13 @@ BLOCK_START ALU
ALU_GROUP_BEGIN
ALU SETGT_DX10 S4.x@chan : S3.x@chan S2.y@free {WL}
ALU_GROUP_END
IF (( ALU PRED_SETNE_INT __.x@free : S4.x@chan I[0] {LEP} PUSH_BEFORE ))
BLOCK_END
BLOCK_START ALU
ALU_GROUP_BEGIN
ALU PRED_SETNE_INT __.x@chan : S4.x@chan I[0] {LEP} PUSH_BEFORE
ALU_GROUP_END
IF (( ALU PRED_SETNE_INT __.x@chan : S4.x@chan I[0] {LEP} PUSH_BEFORE ))
BREAK
BLOCK_END
BLOCK_START ALU
BLOCK_START
ENDIF
BLOCK_END
BLOCK_START ALU