diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index a4507e71a8e..5bb6675cf36 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -1248,7 +1248,6 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, unsigned type) { struct r600_bytecode_alu *nalu = r600_bytecode_alu(); - struct r600_bytecode_alu *lalu; int i, r; if (!nalu) @@ -1260,22 +1259,12 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc, assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs); } - if (bc->cf_last != NULL && bc->cf_last->op != type) { + if (bc->cf_last != NULL && bc->cf_last->op != type && !bc->force_add_cf) { /* check if we could add it anyway */ - if ((bc->cf_last->op == CF_OP_ALU && type == CF_OP_ALU_PUSH_BEFORE) || - (bc->cf_last->op == CF_OP_ALU_PUSH_BEFORE && type == CF_OP_ALU)) { - LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) { - if (lalu->execute_mask) { - assert(bc->force_add_cf || !"no force cf"); - bc->force_add_cf = 1; - break; - } - type = CF_OP_ALU_PUSH_BEFORE; - } - } else { - assert(bc->force_add_cf ||!"no force cf"); - bc->force_add_cf = 1; - } + if (bc->cf_last->op == CF_OP_ALU_PUSH_BEFORE && type == CF_OP_ALU) + type = CF_OP_ALU_PUSH_BEFORE; + else + assert(!"Try adding ALU with unsipported CF type to ALU_PUSH_BEFORE"); } /* cf can contains only alu or only vtx or only tex */ diff --git a/src/gallium/drivers/r600/sfn/sfn_assembler.cpp b/src/gallium/drivers/r600/sfn/sfn_assembler.cpp index 54a32089d5f..29edc3235de 100644 --- a/src/gallium/drivers/r600/sfn/sfn_assembler.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_assembler.cpp @@ -361,15 +361,14 @@ AssamblerVisitor::emit_alu_op(const AluInstr& ai) if (dst) sfn_log << SfnLog::assembly << " Current dst register is " << *dst << "\n"; + /* auto cf_op = ai.cf_type(); unsigned type = 0; switch (cf_op) { case cf_alu: - type = CF_OP_ALU; - break; case cf_alu_push_before: - type = CF_OP_ALU_PUSH_BEFORE; + type = CF_OP_ALU; break; case cf_alu_pop_after: type = CF_OP_ALU_POP_AFTER; @@ -392,11 +391,11 @@ AssamblerVisitor::emit_alu_op(const AluInstr& ai) default: assert(0 && "cf_alu_undefined should have been replaced"); } - +*/ if (alu.last) m_nliterals_in_group.clear(); - m_result = !r600_bytecode_add_alu_type(m_bc, &alu, type); + m_result = !r600_bytecode_add_alu(m_bc, &alu); if (unlikely(ai.opcode() == op1_mova_int)) { if (m_bc->gfx_level < CAYMAN || alu.dst.sel == 0) { @@ -851,7 +850,9 @@ AssamblerVisitor::visit(const Block& block) if (block.empty()) return; - if (block.has_instr_flag(Instr::force_cf)) { + if (block.cf_start()) + block.cf_start()->accept(*this); + else if (block.has_instr_flag(Instr::force_cf)) { m_bc->force_add_cf = 1; m_bc->ar_loaded = 0; m_last_addr = nullptr; @@ -874,15 +875,12 @@ AssamblerVisitor::visit(const Block& block) void AssamblerVisitor::visit(const IfInstr& instr) { - emit_alu_push_before(); auto pred = instr.predicate(); auto [addr, dummy0, dummy1] = pred->indirect_addr(); assert(!dummy1); assert(!addr); - pred->accept(*this); - r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP); clear_states(sf_all); @@ -892,6 +890,8 @@ AssamblerVisitor::visit(const IfInstr& instr) void AssamblerVisitor::visit(const ControlFlowInstr& instr) { + sfn_log << SfnLog::assembly << "Translate " << instr << " "; + clear_states(sf_all); switch (instr.cf_type()) { case ControlFlowInstr::cf_else: @@ -926,6 +926,21 @@ AssamblerVisitor::visit(const ControlFlowInstr& instr) m_result = false; } } break; + case ControlFlowInstr::cf_alu: + r600_bytecode_add_cfinst(m_bc, CF_OP_ALU); + break; + case ControlFlowInstr::cf_alu_push_before: + emit_alu_push_before(); + break; + case ControlFlowInstr::cf_gds: + r600_bytecode_add_cfinst(m_bc, CF_OP_GDS); + break; + case ControlFlowInstr::cf_tex: + r600_bytecode_add_cfinst(m_bc, CF_OP_TEX); + break; + case ControlFlowInstr::cf_vtx: + r600_bytecode_add_cfinst(m_bc, CF_OP_VTX); + break; default: UNREACHABLE("Unknown CF instruction type"); } diff --git a/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.cpp b/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.cpp index d9a907af403..51d048317ac 100644 --- a/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_liverangeevaluator.cpp @@ -374,7 +374,6 @@ LiveRangeInstrVisitor::visit(IfInstr *instr) { int b = m_block; m_block = -1; - instr->predicate()->accept(*this); scope_if(); m_block = b; } diff --git a/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp index e5e24a9ca13..b9106346927 100644 --- a/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp @@ -72,6 +72,7 @@ public: { assert(!m_cf_instr); m_cf_instr = instr; + predicate = instr->predicate(); } void visit(EmitVertexInstr *instr) override @@ -121,6 +122,8 @@ public: std::list gds_instr; std::list waitacks; + AluInstr *predicate{nullptr}; + Instr *m_cf_instr{nullptr}; ValueFactory& m_value_factory; @@ -156,7 +159,8 @@ private: bool collect_ready_type(std::list& ready, std::list& orig); bool collect_ready_alu_vec(std::list& ready, - std::list& available); + std::list& available, + AluInstr **predicate); bool schedule_tex(Shader::ShaderBlocks& out_blocks); bool schedule_vtx(Shader::ShaderBlocks& out_blocks); @@ -500,10 +504,6 @@ BlockScheduler::schedule_block(Block& in_block, assert(!fail); if (cir.m_cf_instr) { - // Assert that if condition is ready - if (m_current_block->type() != Block::alu) { - start_new_block(out_blocks, Block::alu); - } m_current_block->push_back(cir.m_cf_instr); cir.m_cf_instr->set_scheduled(); } @@ -787,18 +787,13 @@ BlockScheduler::start_new_block(Shader::ShaderBlocks& out_blocks, Block::Type ty void BlockScheduler::maybe_split_alu_block(Shader::ShaderBlocks& out_blocks) { - // TODO: needs fixing - if (m_current_block->remaining_slots() > 0) { - out_blocks.push_back(m_current_block); - return; - } int used_slots = 0; int pending_slots = 0; Instr *next_block_start = nullptr; for (auto cur_group : *m_current_block) { - /* This limit is a bit fishy, it should be 128 */ + if (used_slots + pending_slots + cur_group->slots() < 128) { if (cur_group->can_start_alu_block()) { next_block_start = cur_group; @@ -843,6 +838,8 @@ void BlockScheduler::maybe_split_alu_block(Shader::ShaderBlocks& out_blocks) if (group->has_lds_group_end()) sub_block->lds_group_end(); + if (group->require_push()) + sub_block->cf_start()->promote_alu_cf(ControlFlowInstr::cf_alu_push_before); } if (!sub_block->empty()) out_blocks.push_back(sub_block); @@ -1137,7 +1134,6 @@ BlockScheduler::collect_ready(CollectInstructions& available) { sfn_log << SfnLog::schedule << "Ready instructions\n"; bool result = false; - result |= collect_ready_alu_vec(alu_vec_ready, available.alu_vec); result |= collect_ready_type(alu_trans_ready, available.alu_trans); result |= collect_ready_type(alu_multi_slot_ready, available.alu_multi_slot); result |= collect_ready_type(alu_groups_ready, available.alu_groups); @@ -1147,13 +1143,22 @@ BlockScheduler::collect_ready(CollectInstructions& available) result |= collect_ready_type(free_ready, available.free_instr); result |= collect_ready_type(waitacks_ready, available.waitacks); + if (!result && available.predicate && available.alu_groups.empty() && + available.gds_instr.empty() && available.tex.empty() && + available.fetches.empty() && available.free_instr.empty()) + result |= + collect_ready_alu_vec(alu_vec_ready, available.alu_vec, &available.predicate); + else + result |= collect_ready_alu_vec(alu_vec_ready, available.alu_vec, nullptr); + sfn_log << SfnLog::schedule << "\n"; return result; } bool BlockScheduler::collect_ready_alu_vec(std::list& ready, - std::list& available) + std::list& available, + AluInstr **predicate) { auto i = available.begin(); auto e = available.end(); @@ -1215,6 +1220,12 @@ BlockScheduler::collect_ready_alu_vec(std::list& ready, ++i; } + if (predicate && *predicate && available.empty() && ready.size() < 16 && + (*predicate)->ready()) { + ready.push_back(*predicate); + *predicate = nullptr; + } + for (auto& i : ready) sfn_log << SfnLog::schedule << "V: " << *i << "\n"; diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp index 91bc6318cb7..f6e360f5968 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp +++ b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp @@ -1860,7 +1860,7 @@ OUTPUT LOC:4 VARYING_SLOT:35 MASK:15 SYSVALUES R1.xyzw ARRAYS A2[4].xy A2[4].zw SHADER -BLOCK_START ALU +BLOCK_START ALU_PUSH_BEFORE ALU_GROUP_BEGIN ALU MOV A2[0].x : I[1.0] {W} ALU MOV A2[0].y : L[0x3f8ccccd] {W} @@ -1908,10 +1908,12 @@ ALU_GROUP_BEGIN ALU MULADD_IEEE S17.w : KC0[3].w R1.z@fully S15.w {WL} ALU_GROUP_END ALU_GROUP_BEGIN + ALU PRED_SETGE_INT __.x@chan : KC0[0].x L[0x4] {EP} PUSH_BEFORE ALU MULADD_IEEE S19.z@group : KC0[4].z R1.w@fully S17.z {W} ALU MULADD_IEEE S19.w@group : KC0[4].w R1.w@fully S17.w {WL} ALU_GROUP_END -IF (( ALU PRED_SETGE_INT __.x@free : KC0[0].x L[0x4] {LEP} PUSH_BEFORE )) +IF (( ALU PRED_SETGE_INT __.x@chan : KC0[0].x L[0x4] {EP} PUSH_BEFORE )) +BLOCK_END BLOCK_START ALU ALU_GROUP_BEGIN ALU ADD_INT S34.x : KC0[0].x L[0xfffffffc] {WL} @@ -2467,7 +2469,7 @@ ALU_GROUP_BEGIN ALU_GROUP_END LOOP_BEGIN BLOCK_END -BLOCK_START ALU +BLOCK_START ALU_PUSH_BEFORE ALU_GROUP_BEGIN ALU RECIPSQRT_IEEE S3.x@chan : |R1.x@free| {W} ALU RECIPSQRT_IEEE __.y@chgr : |R1.x@free| {} @@ -2476,12 +2478,13 @@ BLOCK_START ALU ALU_GROUP_BEGIN ALU SETGT_DX10 S4.x@chan : S3.x@chan S2.y@free {WL} ALU_GROUP_END - IF (( ALU PRED_SETNE_INT __.x@free : S4.x@chan I[0] {LEP} PUSH_BEFORE )) -BLOCK_END -BLOCK_START ALU + ALU_GROUP_BEGIN + ALU PRED_SETNE_INT __.x@chan : S4.x@chan I[0] {LEP} PUSH_BEFORE + ALU_GROUP_END + IF (( ALU PRED_SETNE_INT __.x@chan : S4.x@chan I[0] {LEP} PUSH_BEFORE )) BREAK BLOCK_END -BLOCK_START ALU +BLOCK_START ENDIF BLOCK_END BLOCK_START ALU