r600/sfn: handle the IF predicate in the scheduler

This involves also emitting the CF for the blocks in the
sfn-assembler and we have to remove the special handling in the
old backend assembler.

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37096>
This commit is contained in:
Gert Wollny 2025-08-29 00:03:13 +02:00 committed by Marge Bot
parent 359bfc3138
commit 713edb5998
5 changed files with 63 additions and 46 deletions

View file

@ -1248,7 +1248,6 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
const struct r600_bytecode_alu *alu, unsigned type) const struct r600_bytecode_alu *alu, unsigned type)
{ {
struct r600_bytecode_alu *nalu = r600_bytecode_alu(); struct r600_bytecode_alu *nalu = r600_bytecode_alu();
struct r600_bytecode_alu *lalu;
int i, r; int i, r;
if (!nalu) if (!nalu)
@ -1260,22 +1259,12 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs); assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs);
} }
if (bc->cf_last != NULL && bc->cf_last->op != type) { if (bc->cf_last != NULL && bc->cf_last->op != type && !bc->force_add_cf) {
/* check if we could add it anyway */ /* check if we could add it anyway */
if ((bc->cf_last->op == CF_OP_ALU && type == CF_OP_ALU_PUSH_BEFORE) || if (bc->cf_last->op == CF_OP_ALU_PUSH_BEFORE && type == CF_OP_ALU)
(bc->cf_last->op == CF_OP_ALU_PUSH_BEFORE && type == CF_OP_ALU)) {
LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) {
if (lalu->execute_mask) {
assert(bc->force_add_cf || !"no force cf");
bc->force_add_cf = 1;
break;
}
type = CF_OP_ALU_PUSH_BEFORE; type = CF_OP_ALU_PUSH_BEFORE;
} else
} else { assert(!"Try adding ALU with unsipported CF type to ALU_PUSH_BEFORE");
assert(bc->force_add_cf ||!"no force cf");
bc->force_add_cf = 1;
}
} }
/* cf can contains only alu or only vtx or only tex */ /* cf can contains only alu or only vtx or only tex */

View file

@ -361,15 +361,14 @@ AssamblerVisitor::emit_alu_op(const AluInstr& ai)
if (dst) if (dst)
sfn_log << SfnLog::assembly << " Current dst register is " << *dst << "\n"; sfn_log << SfnLog::assembly << " Current dst register is " << *dst << "\n";
/*
auto cf_op = ai.cf_type(); auto cf_op = ai.cf_type();
unsigned type = 0; unsigned type = 0;
switch (cf_op) { switch (cf_op) {
case cf_alu: case cf_alu:
type = CF_OP_ALU;
break;
case cf_alu_push_before: case cf_alu_push_before:
type = CF_OP_ALU_PUSH_BEFORE; type = CF_OP_ALU;
break; break;
case cf_alu_pop_after: case cf_alu_pop_after:
type = CF_OP_ALU_POP_AFTER; type = CF_OP_ALU_POP_AFTER;
@ -392,11 +391,11 @@ AssamblerVisitor::emit_alu_op(const AluInstr& ai)
default: default:
assert(0 && "cf_alu_undefined should have been replaced"); assert(0 && "cf_alu_undefined should have been replaced");
} }
*/
if (alu.last) if (alu.last)
m_nliterals_in_group.clear(); m_nliterals_in_group.clear();
m_result = !r600_bytecode_add_alu_type(m_bc, &alu, type); m_result = !r600_bytecode_add_alu(m_bc, &alu);
if (unlikely(ai.opcode() == op1_mova_int)) { if (unlikely(ai.opcode() == op1_mova_int)) {
if (m_bc->gfx_level < CAYMAN || alu.dst.sel == 0) { if (m_bc->gfx_level < CAYMAN || alu.dst.sel == 0) {
@ -851,7 +850,9 @@ AssamblerVisitor::visit(const Block& block)
if (block.empty()) if (block.empty())
return; return;
if (block.has_instr_flag(Instr::force_cf)) { if (block.cf_start())
block.cf_start()->accept(*this);
else if (block.has_instr_flag(Instr::force_cf)) {
m_bc->force_add_cf = 1; m_bc->force_add_cf = 1;
m_bc->ar_loaded = 0; m_bc->ar_loaded = 0;
m_last_addr = nullptr; m_last_addr = nullptr;
@ -874,15 +875,12 @@ AssamblerVisitor::visit(const Block& block)
void void
AssamblerVisitor::visit(const IfInstr& instr) AssamblerVisitor::visit(const IfInstr& instr)
{ {
emit_alu_push_before();
auto pred = instr.predicate(); auto pred = instr.predicate();
auto [addr, dummy0, dummy1] = pred->indirect_addr(); auto [addr, dummy0, dummy1] = pred->indirect_addr();
assert(!dummy1); assert(!dummy1);
assert(!addr); assert(!addr);
pred->accept(*this);
r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP); r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP);
clear_states(sf_all); clear_states(sf_all);
@ -892,6 +890,8 @@ AssamblerVisitor::visit(const IfInstr& instr)
void void
AssamblerVisitor::visit(const ControlFlowInstr& instr) AssamblerVisitor::visit(const ControlFlowInstr& instr)
{ {
sfn_log << SfnLog::assembly << "Translate " << instr << " ";
clear_states(sf_all); clear_states(sf_all);
switch (instr.cf_type()) { switch (instr.cf_type()) {
case ControlFlowInstr::cf_else: case ControlFlowInstr::cf_else:
@ -926,6 +926,21 @@ AssamblerVisitor::visit(const ControlFlowInstr& instr)
m_result = false; m_result = false;
} }
} break; } break;
case ControlFlowInstr::cf_alu:
r600_bytecode_add_cfinst(m_bc, CF_OP_ALU);
break;
case ControlFlowInstr::cf_alu_push_before:
emit_alu_push_before();
break;
case ControlFlowInstr::cf_gds:
r600_bytecode_add_cfinst(m_bc, CF_OP_GDS);
break;
case ControlFlowInstr::cf_tex:
r600_bytecode_add_cfinst(m_bc, CF_OP_TEX);
break;
case ControlFlowInstr::cf_vtx:
r600_bytecode_add_cfinst(m_bc, CF_OP_VTX);
break;
default: default:
UNREACHABLE("Unknown CF instruction type"); UNREACHABLE("Unknown CF instruction type");
} }

View file

@ -374,7 +374,6 @@ LiveRangeInstrVisitor::visit(IfInstr *instr)
{ {
int b = m_block; int b = m_block;
m_block = -1; m_block = -1;
instr->predicate()->accept(*this);
scope_if(); scope_if();
m_block = b; m_block = b;
} }

View file

@ -72,6 +72,7 @@ public:
{ {
assert(!m_cf_instr); assert(!m_cf_instr);
m_cf_instr = instr; m_cf_instr = instr;
predicate = instr->predicate();
} }
void visit(EmitVertexInstr *instr) override void visit(EmitVertexInstr *instr) override
@ -121,6 +122,8 @@ public:
std::list<Instr *> gds_instr; std::list<Instr *> gds_instr;
std::list<Instr *> waitacks; std::list<Instr *> waitacks;
AluInstr *predicate{nullptr};
Instr *m_cf_instr{nullptr}; Instr *m_cf_instr{nullptr};
ValueFactory& m_value_factory; ValueFactory& m_value_factory;
@ -156,7 +159,8 @@ private:
bool collect_ready_type(std::list<T *>& ready, std::list<T *>& orig); bool collect_ready_type(std::list<T *>& ready, std::list<T *>& orig);
bool collect_ready_alu_vec(std::list<AluInstr *>& ready, bool collect_ready_alu_vec(std::list<AluInstr *>& ready,
std::list<AluInstr *>& available); std::list<AluInstr *>& available,
AluInstr **predicate);
bool schedule_tex(Shader::ShaderBlocks& out_blocks); bool schedule_tex(Shader::ShaderBlocks& out_blocks);
bool schedule_vtx(Shader::ShaderBlocks& out_blocks); bool schedule_vtx(Shader::ShaderBlocks& out_blocks);
@ -500,10 +504,6 @@ BlockScheduler::schedule_block(Block& in_block,
assert(!fail); assert(!fail);
if (cir.m_cf_instr) { if (cir.m_cf_instr) {
// Assert that if condition is ready
if (m_current_block->type() != Block::alu) {
start_new_block(out_blocks, Block::alu);
}
m_current_block->push_back(cir.m_cf_instr); m_current_block->push_back(cir.m_cf_instr);
cir.m_cf_instr->set_scheduled(); cir.m_cf_instr->set_scheduled();
} }
@ -787,18 +787,13 @@ BlockScheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type ty
void BlockScheduler::maybe_split_alu_block(Shader::ShaderBlocks& out_blocks) void BlockScheduler::maybe_split_alu_block(Shader::ShaderBlocks& out_blocks)
{ {
// TODO: needs fixing
if (m_current_block->remaining_slots() > 0) {
out_blocks.push_back(m_current_block);
return;
}
int used_slots = 0; int used_slots = 0;
int pending_slots = 0; int pending_slots = 0;
Instr *next_block_start = nullptr; Instr *next_block_start = nullptr;
for (auto cur_group : *m_current_block) { for (auto cur_group : *m_current_block) {
/* This limit is a bit fishy, it should be 128 */
if (used_slots + pending_slots + cur_group->slots() < 128) { if (used_slots + pending_slots + cur_group->slots() < 128) {
if (cur_group->can_start_alu_block()) { if (cur_group->can_start_alu_block()) {
next_block_start = cur_group; next_block_start = cur_group;
@ -843,6 +838,8 @@ void BlockScheduler::maybe_split_alu_block(Shader::ShaderBlocks& out_blocks)
if (group->has_lds_group_end()) if (group->has_lds_group_end())
sub_block->lds_group_end(); sub_block->lds_group_end();
if (group->require_push())
sub_block->cf_start()->promote_alu_cf(ControlFlowInstr::cf_alu_push_before);
} }
if (!sub_block->empty()) if (!sub_block->empty())
out_blocks.push_back(sub_block); out_blocks.push_back(sub_block);
@ -1137,7 +1134,6 @@ BlockScheduler::collect_ready(CollectInstructions& available)
{ {
sfn_log << SfnLog::schedule << "Ready instructions\n"; sfn_log << SfnLog::schedule << "Ready instructions\n";
bool result = false; bool result = false;
result |= collect_ready_alu_vec(alu_vec_ready, available.alu_vec);
result |= collect_ready_type(alu_trans_ready, available.alu_trans); result |= collect_ready_type(alu_trans_ready, available.alu_trans);
result |= collect_ready_type(alu_multi_slot_ready, available.alu_multi_slot); result |= collect_ready_type(alu_multi_slot_ready, available.alu_multi_slot);
result |= collect_ready_type(alu_groups_ready, available.alu_groups); result |= collect_ready_type(alu_groups_ready, available.alu_groups);
@ -1147,13 +1143,22 @@ BlockScheduler::collect_ready(CollectInstructions& available)
result |= collect_ready_type(free_ready, available.free_instr); result |= collect_ready_type(free_ready, available.free_instr);
result |= collect_ready_type(waitacks_ready, available.waitacks); result |= collect_ready_type(waitacks_ready, available.waitacks);
if (!result && available.predicate && available.alu_groups.empty() &&
available.gds_instr.empty() && available.tex.empty() &&
available.fetches.empty() && available.free_instr.empty())
result |=
collect_ready_alu_vec(alu_vec_ready, available.alu_vec, &available.predicate);
else
result |= collect_ready_alu_vec(alu_vec_ready, available.alu_vec, nullptr);
sfn_log << SfnLog::schedule << "\n"; sfn_log << SfnLog::schedule << "\n";
return result; return result;
} }
bool bool
BlockScheduler::collect_ready_alu_vec(std::list<AluInstr *>& ready, BlockScheduler::collect_ready_alu_vec(std::list<AluInstr *>& ready,
std::list<AluInstr *>& available) std::list<AluInstr *>& available,
AluInstr **predicate)
{ {
auto i = available.begin(); auto i = available.begin();
auto e = available.end(); auto e = available.end();
@ -1215,6 +1220,12 @@ BlockScheduler::collect_ready_alu_vec(std::list<AluInstr *>& ready,
++i; ++i;
} }
if (predicate && *predicate && available.empty() && ready.size() < 16 &&
(*predicate)->ready()) {
ready.push_back(*predicate);
*predicate = nullptr;
}
for (auto& i : ready) for (auto& i : ready)
sfn_log << SfnLog::schedule << "V: " << *i << "\n"; sfn_log << SfnLog::schedule << "V: " << *i << "\n";

View file

@ -1860,7 +1860,7 @@ OUTPUT LOC:4 VARYING_SLOT:35 MASK:15
SYSVALUES R1.xyzw SYSVALUES R1.xyzw
ARRAYS A2[4].xy A2[4].zw ARRAYS A2[4].xy A2[4].zw
SHADER SHADER
BLOCK_START ALU BLOCK_START ALU_PUSH_BEFORE
ALU_GROUP_BEGIN ALU_GROUP_BEGIN
ALU MOV A2[0].x : I[1.0] {W} ALU MOV A2[0].x : I[1.0] {W}
ALU MOV A2[0].y : L[0x3f8ccccd] {W} ALU MOV A2[0].y : L[0x3f8ccccd] {W}
@ -1908,10 +1908,12 @@ ALU_GROUP_BEGIN
ALU MULADD_IEEE S17.w : KC0[3].w R1.z@fully S15.w {WL} ALU MULADD_IEEE S17.w : KC0[3].w R1.z@fully S15.w {WL}
ALU_GROUP_END ALU_GROUP_END
ALU_GROUP_BEGIN ALU_GROUP_BEGIN
ALU PRED_SETGE_INT __.x@chan : KC0[0].x L[0x4] {EP} PUSH_BEFORE
ALU MULADD_IEEE S19.z@group : KC0[4].z R1.w@fully S17.z {W} ALU MULADD_IEEE S19.z@group : KC0[4].z R1.w@fully S17.z {W}
ALU MULADD_IEEE S19.w@group : KC0[4].w R1.w@fully S17.w {WL} ALU MULADD_IEEE S19.w@group : KC0[4].w R1.w@fully S17.w {WL}
ALU_GROUP_END ALU_GROUP_END
IF (( ALU PRED_SETGE_INT __.x@free : KC0[0].x L[0x4] {LEP} PUSH_BEFORE )) IF (( ALU PRED_SETGE_INT __.x@chan : KC0[0].x L[0x4] {EP} PUSH_BEFORE ))
BLOCK_END
BLOCK_START ALU BLOCK_START ALU
ALU_GROUP_BEGIN ALU_GROUP_BEGIN
ALU ADD_INT S34.x : KC0[0].x L[0xfffffffc] {WL} ALU ADD_INT S34.x : KC0[0].x L[0xfffffffc] {WL}
@ -2467,7 +2469,7 @@ ALU_GROUP_BEGIN
ALU_GROUP_END ALU_GROUP_END
LOOP_BEGIN LOOP_BEGIN
BLOCK_END BLOCK_END
BLOCK_START ALU BLOCK_START ALU_PUSH_BEFORE
ALU_GROUP_BEGIN ALU_GROUP_BEGIN
ALU RECIPSQRT_IEEE S3.x@chan : |R1.x@free| {W} ALU RECIPSQRT_IEEE S3.x@chan : |R1.x@free| {W}
ALU RECIPSQRT_IEEE __.y@chgr : |R1.x@free| {} ALU RECIPSQRT_IEEE __.y@chgr : |R1.x@free| {}
@ -2476,12 +2478,13 @@ BLOCK_START ALU
ALU_GROUP_BEGIN ALU_GROUP_BEGIN
ALU SETGT_DX10 S4.x@chan : S3.x@chan S2.y@free {WL} ALU SETGT_DX10 S4.x@chan : S3.x@chan S2.y@free {WL}
ALU_GROUP_END ALU_GROUP_END
IF (( ALU PRED_SETNE_INT __.x@free : S4.x@chan I[0] {LEP} PUSH_BEFORE )) ALU_GROUP_BEGIN
BLOCK_END ALU PRED_SETNE_INT __.x@chan : S4.x@chan I[0] {LEP} PUSH_BEFORE
BLOCK_START ALU ALU_GROUP_END
IF (( ALU PRED_SETNE_INT __.x@chan : S4.x@chan I[0] {LEP} PUSH_BEFORE ))
BREAK BREAK
BLOCK_END BLOCK_END
BLOCK_START ALU BLOCK_START
ENDIF ENDIF
BLOCK_END BLOCK_END
BLOCK_START ALU BLOCK_START ALU