mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 18:10:11 +01:00
r600/sfn: handle the IF predicate in the scheduler
This involves also emitting the CF for the blocks in the sfn-assembler and we have to remove the special handling in the old backend assembler. Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37096>
This commit is contained in:
parent
359bfc3138
commit
713edb5998
5 changed files with 63 additions and 46 deletions
|
|
@ -1248,7 +1248,6 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
|
||||||
const struct r600_bytecode_alu *alu, unsigned type)
|
const struct r600_bytecode_alu *alu, unsigned type)
|
||||||
{
|
{
|
||||||
struct r600_bytecode_alu *nalu = r600_bytecode_alu();
|
struct r600_bytecode_alu *nalu = r600_bytecode_alu();
|
||||||
struct r600_bytecode_alu *lalu;
|
|
||||||
int i, r;
|
int i, r;
|
||||||
|
|
||||||
if (!nalu)
|
if (!nalu)
|
||||||
|
|
@ -1260,22 +1259,12 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
|
||||||
assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs);
|
assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bc->cf_last != NULL && bc->cf_last->op != type) {
|
if (bc->cf_last != NULL && bc->cf_last->op != type && !bc->force_add_cf) {
|
||||||
/* check if we could add it anyway */
|
/* check if we could add it anyway */
|
||||||
if ((bc->cf_last->op == CF_OP_ALU && type == CF_OP_ALU_PUSH_BEFORE) ||
|
if (bc->cf_last->op == CF_OP_ALU_PUSH_BEFORE && type == CF_OP_ALU)
|
||||||
(bc->cf_last->op == CF_OP_ALU_PUSH_BEFORE && type == CF_OP_ALU)) {
|
|
||||||
LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) {
|
|
||||||
if (lalu->execute_mask) {
|
|
||||||
assert(bc->force_add_cf || !"no force cf");
|
|
||||||
bc->force_add_cf = 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
type = CF_OP_ALU_PUSH_BEFORE;
|
type = CF_OP_ALU_PUSH_BEFORE;
|
||||||
}
|
else
|
||||||
} else {
|
assert(!"Try adding ALU with unsipported CF type to ALU_PUSH_BEFORE");
|
||||||
assert(bc->force_add_cf ||!"no force cf");
|
|
||||||
bc->force_add_cf = 1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* cf can contains only alu or only vtx or only tex */
|
/* cf can contains only alu or only vtx or only tex */
|
||||||
|
|
|
||||||
|
|
@ -361,15 +361,14 @@ AssamblerVisitor::emit_alu_op(const AluInstr& ai)
|
||||||
if (dst)
|
if (dst)
|
||||||
sfn_log << SfnLog::assembly << " Current dst register is " << *dst << "\n";
|
sfn_log << SfnLog::assembly << " Current dst register is " << *dst << "\n";
|
||||||
|
|
||||||
|
/*
|
||||||
auto cf_op = ai.cf_type();
|
auto cf_op = ai.cf_type();
|
||||||
|
|
||||||
unsigned type = 0;
|
unsigned type = 0;
|
||||||
switch (cf_op) {
|
switch (cf_op) {
|
||||||
case cf_alu:
|
case cf_alu:
|
||||||
type = CF_OP_ALU;
|
|
||||||
break;
|
|
||||||
case cf_alu_push_before:
|
case cf_alu_push_before:
|
||||||
type = CF_OP_ALU_PUSH_BEFORE;
|
type = CF_OP_ALU;
|
||||||
break;
|
break;
|
||||||
case cf_alu_pop_after:
|
case cf_alu_pop_after:
|
||||||
type = CF_OP_ALU_POP_AFTER;
|
type = CF_OP_ALU_POP_AFTER;
|
||||||
|
|
@ -392,11 +391,11 @@ AssamblerVisitor::emit_alu_op(const AluInstr& ai)
|
||||||
default:
|
default:
|
||||||
assert(0 && "cf_alu_undefined should have been replaced");
|
assert(0 && "cf_alu_undefined should have been replaced");
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
if (alu.last)
|
if (alu.last)
|
||||||
m_nliterals_in_group.clear();
|
m_nliterals_in_group.clear();
|
||||||
|
|
||||||
m_result = !r600_bytecode_add_alu_type(m_bc, &alu, type);
|
m_result = !r600_bytecode_add_alu(m_bc, &alu);
|
||||||
|
|
||||||
if (unlikely(ai.opcode() == op1_mova_int)) {
|
if (unlikely(ai.opcode() == op1_mova_int)) {
|
||||||
if (m_bc->gfx_level < CAYMAN || alu.dst.sel == 0) {
|
if (m_bc->gfx_level < CAYMAN || alu.dst.sel == 0) {
|
||||||
|
|
@ -851,7 +850,9 @@ AssamblerVisitor::visit(const Block& block)
|
||||||
if (block.empty())
|
if (block.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (block.has_instr_flag(Instr::force_cf)) {
|
if (block.cf_start())
|
||||||
|
block.cf_start()->accept(*this);
|
||||||
|
else if (block.has_instr_flag(Instr::force_cf)) {
|
||||||
m_bc->force_add_cf = 1;
|
m_bc->force_add_cf = 1;
|
||||||
m_bc->ar_loaded = 0;
|
m_bc->ar_loaded = 0;
|
||||||
m_last_addr = nullptr;
|
m_last_addr = nullptr;
|
||||||
|
|
@ -874,15 +875,12 @@ AssamblerVisitor::visit(const Block& block)
|
||||||
void
|
void
|
||||||
AssamblerVisitor::visit(const IfInstr& instr)
|
AssamblerVisitor::visit(const IfInstr& instr)
|
||||||
{
|
{
|
||||||
emit_alu_push_before();
|
|
||||||
|
|
||||||
auto pred = instr.predicate();
|
auto pred = instr.predicate();
|
||||||
auto [addr, dummy0, dummy1] = pred->indirect_addr();
|
auto [addr, dummy0, dummy1] = pred->indirect_addr();
|
||||||
assert(!dummy1);
|
assert(!dummy1);
|
||||||
assert(!addr);
|
assert(!addr);
|
||||||
|
|
||||||
pred->accept(*this);
|
|
||||||
|
|
||||||
r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP);
|
r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP);
|
||||||
clear_states(sf_all);
|
clear_states(sf_all);
|
||||||
|
|
||||||
|
|
@ -892,6 +890,8 @@ AssamblerVisitor::visit(const IfInstr& instr)
|
||||||
void
|
void
|
||||||
AssamblerVisitor::visit(const ControlFlowInstr& instr)
|
AssamblerVisitor::visit(const ControlFlowInstr& instr)
|
||||||
{
|
{
|
||||||
|
sfn_log << SfnLog::assembly << "Translate " << instr << " ";
|
||||||
|
|
||||||
clear_states(sf_all);
|
clear_states(sf_all);
|
||||||
switch (instr.cf_type()) {
|
switch (instr.cf_type()) {
|
||||||
case ControlFlowInstr::cf_else:
|
case ControlFlowInstr::cf_else:
|
||||||
|
|
@ -926,6 +926,21 @@ AssamblerVisitor::visit(const ControlFlowInstr& instr)
|
||||||
m_result = false;
|
m_result = false;
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
|
case ControlFlowInstr::cf_alu:
|
||||||
|
r600_bytecode_add_cfinst(m_bc, CF_OP_ALU);
|
||||||
|
break;
|
||||||
|
case ControlFlowInstr::cf_alu_push_before:
|
||||||
|
emit_alu_push_before();
|
||||||
|
break;
|
||||||
|
case ControlFlowInstr::cf_gds:
|
||||||
|
r600_bytecode_add_cfinst(m_bc, CF_OP_GDS);
|
||||||
|
break;
|
||||||
|
case ControlFlowInstr::cf_tex:
|
||||||
|
r600_bytecode_add_cfinst(m_bc, CF_OP_TEX);
|
||||||
|
break;
|
||||||
|
case ControlFlowInstr::cf_vtx:
|
||||||
|
r600_bytecode_add_cfinst(m_bc, CF_OP_VTX);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
UNREACHABLE("Unknown CF instruction type");
|
UNREACHABLE("Unknown CF instruction type");
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -374,7 +374,6 @@ LiveRangeInstrVisitor::visit(IfInstr *instr)
|
||||||
{
|
{
|
||||||
int b = m_block;
|
int b = m_block;
|
||||||
m_block = -1;
|
m_block = -1;
|
||||||
instr->predicate()->accept(*this);
|
|
||||||
scope_if();
|
scope_if();
|
||||||
m_block = b;
|
m_block = b;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -72,6 +72,7 @@ public:
|
||||||
{
|
{
|
||||||
assert(!m_cf_instr);
|
assert(!m_cf_instr);
|
||||||
m_cf_instr = instr;
|
m_cf_instr = instr;
|
||||||
|
predicate = instr->predicate();
|
||||||
}
|
}
|
||||||
|
|
||||||
void visit(EmitVertexInstr *instr) override
|
void visit(EmitVertexInstr *instr) override
|
||||||
|
|
@ -121,6 +122,8 @@ public:
|
||||||
std::list<Instr *> gds_instr;
|
std::list<Instr *> gds_instr;
|
||||||
std::list<Instr *> waitacks;
|
std::list<Instr *> waitacks;
|
||||||
|
|
||||||
|
AluInstr *predicate{nullptr};
|
||||||
|
|
||||||
Instr *m_cf_instr{nullptr};
|
Instr *m_cf_instr{nullptr};
|
||||||
ValueFactory& m_value_factory;
|
ValueFactory& m_value_factory;
|
||||||
|
|
||||||
|
|
@ -156,7 +159,8 @@ private:
|
||||||
bool collect_ready_type(std::list<T *>& ready, std::list<T *>& orig);
|
bool collect_ready_type(std::list<T *>& ready, std::list<T *>& orig);
|
||||||
|
|
||||||
bool collect_ready_alu_vec(std::list<AluInstr *>& ready,
|
bool collect_ready_alu_vec(std::list<AluInstr *>& ready,
|
||||||
std::list<AluInstr *>& available);
|
std::list<AluInstr *>& available,
|
||||||
|
AluInstr **predicate);
|
||||||
|
|
||||||
bool schedule_tex(Shader::ShaderBlocks& out_blocks);
|
bool schedule_tex(Shader::ShaderBlocks& out_blocks);
|
||||||
bool schedule_vtx(Shader::ShaderBlocks& out_blocks);
|
bool schedule_vtx(Shader::ShaderBlocks& out_blocks);
|
||||||
|
|
@ -500,10 +504,6 @@ BlockScheduler::schedule_block(Block& in_block,
|
||||||
assert(!fail);
|
assert(!fail);
|
||||||
|
|
||||||
if (cir.m_cf_instr) {
|
if (cir.m_cf_instr) {
|
||||||
// Assert that if condition is ready
|
|
||||||
if (m_current_block->type() != Block::alu) {
|
|
||||||
start_new_block(out_blocks, Block::alu);
|
|
||||||
}
|
|
||||||
m_current_block->push_back(cir.m_cf_instr);
|
m_current_block->push_back(cir.m_cf_instr);
|
||||||
cir.m_cf_instr->set_scheduled();
|
cir.m_cf_instr->set_scheduled();
|
||||||
}
|
}
|
||||||
|
|
@ -787,18 +787,13 @@ BlockScheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type ty
|
||||||
|
|
||||||
void BlockScheduler::maybe_split_alu_block(Shader::ShaderBlocks& out_blocks)
|
void BlockScheduler::maybe_split_alu_block(Shader::ShaderBlocks& out_blocks)
|
||||||
{
|
{
|
||||||
// TODO: needs fixing
|
|
||||||
if (m_current_block->remaining_slots() > 0) {
|
|
||||||
out_blocks.push_back(m_current_block);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
int used_slots = 0;
|
int used_slots = 0;
|
||||||
int pending_slots = 0;
|
int pending_slots = 0;
|
||||||
|
|
||||||
Instr *next_block_start = nullptr;
|
Instr *next_block_start = nullptr;
|
||||||
for (auto cur_group : *m_current_block) {
|
for (auto cur_group : *m_current_block) {
|
||||||
/* This limit is a bit fishy, it should be 128 */
|
|
||||||
if (used_slots + pending_slots + cur_group->slots() < 128) {
|
if (used_slots + pending_slots + cur_group->slots() < 128) {
|
||||||
if (cur_group->can_start_alu_block()) {
|
if (cur_group->can_start_alu_block()) {
|
||||||
next_block_start = cur_group;
|
next_block_start = cur_group;
|
||||||
|
|
@ -843,6 +838,8 @@ void BlockScheduler::maybe_split_alu_block(Shader::ShaderBlocks& out_blocks)
|
||||||
if (group->has_lds_group_end())
|
if (group->has_lds_group_end())
|
||||||
sub_block->lds_group_end();
|
sub_block->lds_group_end();
|
||||||
|
|
||||||
|
if (group->require_push())
|
||||||
|
sub_block->cf_start()->promote_alu_cf(ControlFlowInstr::cf_alu_push_before);
|
||||||
}
|
}
|
||||||
if (!sub_block->empty())
|
if (!sub_block->empty())
|
||||||
out_blocks.push_back(sub_block);
|
out_blocks.push_back(sub_block);
|
||||||
|
|
@ -1137,7 +1134,6 @@ BlockScheduler::collect_ready(CollectInstructions& available)
|
||||||
{
|
{
|
||||||
sfn_log << SfnLog::schedule << "Ready instructions\n";
|
sfn_log << SfnLog::schedule << "Ready instructions\n";
|
||||||
bool result = false;
|
bool result = false;
|
||||||
result |= collect_ready_alu_vec(alu_vec_ready, available.alu_vec);
|
|
||||||
result |= collect_ready_type(alu_trans_ready, available.alu_trans);
|
result |= collect_ready_type(alu_trans_ready, available.alu_trans);
|
||||||
result |= collect_ready_type(alu_multi_slot_ready, available.alu_multi_slot);
|
result |= collect_ready_type(alu_multi_slot_ready, available.alu_multi_slot);
|
||||||
result |= collect_ready_type(alu_groups_ready, available.alu_groups);
|
result |= collect_ready_type(alu_groups_ready, available.alu_groups);
|
||||||
|
|
@ -1147,13 +1143,22 @@ BlockScheduler::collect_ready(CollectInstructions& available)
|
||||||
result |= collect_ready_type(free_ready, available.free_instr);
|
result |= collect_ready_type(free_ready, available.free_instr);
|
||||||
result |= collect_ready_type(waitacks_ready, available.waitacks);
|
result |= collect_ready_type(waitacks_ready, available.waitacks);
|
||||||
|
|
||||||
|
if (!result && available.predicate && available.alu_groups.empty() &&
|
||||||
|
available.gds_instr.empty() && available.tex.empty() &&
|
||||||
|
available.fetches.empty() && available.free_instr.empty())
|
||||||
|
result |=
|
||||||
|
collect_ready_alu_vec(alu_vec_ready, available.alu_vec, &available.predicate);
|
||||||
|
else
|
||||||
|
result |= collect_ready_alu_vec(alu_vec_ready, available.alu_vec, nullptr);
|
||||||
|
|
||||||
sfn_log << SfnLog::schedule << "\n";
|
sfn_log << SfnLog::schedule << "\n";
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
BlockScheduler::collect_ready_alu_vec(std::list<AluInstr *>& ready,
|
BlockScheduler::collect_ready_alu_vec(std::list<AluInstr *>& ready,
|
||||||
std::list<AluInstr *>& available)
|
std::list<AluInstr *>& available,
|
||||||
|
AluInstr **predicate)
|
||||||
{
|
{
|
||||||
auto i = available.begin();
|
auto i = available.begin();
|
||||||
auto e = available.end();
|
auto e = available.end();
|
||||||
|
|
@ -1215,6 +1220,12 @@ BlockScheduler::collect_ready_alu_vec(std::list<AluInstr *>& ready,
|
||||||
++i;
|
++i;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (predicate && *predicate && available.empty() && ready.size() < 16 &&
|
||||||
|
(*predicate)->ready()) {
|
||||||
|
ready.push_back(*predicate);
|
||||||
|
*predicate = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
for (auto& i : ready)
|
for (auto& i : ready)
|
||||||
sfn_log << SfnLog::schedule << "V: " << *i << "\n";
|
sfn_log << SfnLog::schedule << "V: " << *i << "\n";
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1860,7 +1860,7 @@ OUTPUT LOC:4 VARYING_SLOT:35 MASK:15
|
||||||
SYSVALUES R1.xyzw
|
SYSVALUES R1.xyzw
|
||||||
ARRAYS A2[4].xy A2[4].zw
|
ARRAYS A2[4].xy A2[4].zw
|
||||||
SHADER
|
SHADER
|
||||||
BLOCK_START ALU
|
BLOCK_START ALU_PUSH_BEFORE
|
||||||
ALU_GROUP_BEGIN
|
ALU_GROUP_BEGIN
|
||||||
ALU MOV A2[0].x : I[1.0] {W}
|
ALU MOV A2[0].x : I[1.0] {W}
|
||||||
ALU MOV A2[0].y : L[0x3f8ccccd] {W}
|
ALU MOV A2[0].y : L[0x3f8ccccd] {W}
|
||||||
|
|
@ -1908,10 +1908,12 @@ ALU_GROUP_BEGIN
|
||||||
ALU MULADD_IEEE S17.w : KC0[3].w R1.z@fully S15.w {WL}
|
ALU MULADD_IEEE S17.w : KC0[3].w R1.z@fully S15.w {WL}
|
||||||
ALU_GROUP_END
|
ALU_GROUP_END
|
||||||
ALU_GROUP_BEGIN
|
ALU_GROUP_BEGIN
|
||||||
|
ALU PRED_SETGE_INT __.x@chan : KC0[0].x L[0x4] {EP} PUSH_BEFORE
|
||||||
ALU MULADD_IEEE S19.z@group : KC0[4].z R1.w@fully S17.z {W}
|
ALU MULADD_IEEE S19.z@group : KC0[4].z R1.w@fully S17.z {W}
|
||||||
ALU MULADD_IEEE S19.w@group : KC0[4].w R1.w@fully S17.w {WL}
|
ALU MULADD_IEEE S19.w@group : KC0[4].w R1.w@fully S17.w {WL}
|
||||||
ALU_GROUP_END
|
ALU_GROUP_END
|
||||||
IF (( ALU PRED_SETGE_INT __.x@free : KC0[0].x L[0x4] {LEP} PUSH_BEFORE ))
|
IF (( ALU PRED_SETGE_INT __.x@chan : KC0[0].x L[0x4] {EP} PUSH_BEFORE ))
|
||||||
|
BLOCK_END
|
||||||
BLOCK_START ALU
|
BLOCK_START ALU
|
||||||
ALU_GROUP_BEGIN
|
ALU_GROUP_BEGIN
|
||||||
ALU ADD_INT S34.x : KC0[0].x L[0xfffffffc] {WL}
|
ALU ADD_INT S34.x : KC0[0].x L[0xfffffffc] {WL}
|
||||||
|
|
@ -2467,7 +2469,7 @@ ALU_GROUP_BEGIN
|
||||||
ALU_GROUP_END
|
ALU_GROUP_END
|
||||||
LOOP_BEGIN
|
LOOP_BEGIN
|
||||||
BLOCK_END
|
BLOCK_END
|
||||||
BLOCK_START ALU
|
BLOCK_START ALU_PUSH_BEFORE
|
||||||
ALU_GROUP_BEGIN
|
ALU_GROUP_BEGIN
|
||||||
ALU RECIPSQRT_IEEE S3.x@chan : |R1.x@free| {W}
|
ALU RECIPSQRT_IEEE S3.x@chan : |R1.x@free| {W}
|
||||||
ALU RECIPSQRT_IEEE __.y@chgr : |R1.x@free| {}
|
ALU RECIPSQRT_IEEE __.y@chgr : |R1.x@free| {}
|
||||||
|
|
@ -2476,12 +2478,13 @@ BLOCK_START ALU
|
||||||
ALU_GROUP_BEGIN
|
ALU_GROUP_BEGIN
|
||||||
ALU SETGT_DX10 S4.x@chan : S3.x@chan S2.y@free {WL}
|
ALU SETGT_DX10 S4.x@chan : S3.x@chan S2.y@free {WL}
|
||||||
ALU_GROUP_END
|
ALU_GROUP_END
|
||||||
IF (( ALU PRED_SETNE_INT __.x@free : S4.x@chan I[0] {LEP} PUSH_BEFORE ))
|
ALU_GROUP_BEGIN
|
||||||
BLOCK_END
|
ALU PRED_SETNE_INT __.x@chan : S4.x@chan I[0] {LEP} PUSH_BEFORE
|
||||||
BLOCK_START ALU
|
ALU_GROUP_END
|
||||||
|
IF (( ALU PRED_SETNE_INT __.x@chan : S4.x@chan I[0] {LEP} PUSH_BEFORE ))
|
||||||
BREAK
|
BREAK
|
||||||
BLOCK_END
|
BLOCK_END
|
||||||
BLOCK_START ALU
|
BLOCK_START
|
||||||
ENDIF
|
ENDIF
|
||||||
BLOCK_END
|
BLOCK_END
|
||||||
BLOCK_START ALU
|
BLOCK_START ALU
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue