diff --git a/src/gallium/drivers/r600/sfn/sfn_instr.cpp b/src/gallium/drivers/r600/sfn/sfn_instr.cpp index ff431c71e60..788e8532b29 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr.cpp @@ -244,7 +244,10 @@ Block::do_print(std::ostream& os) const { for (int j = 0; j < 2 * m_nesting_depth; ++j) os << ' '; - os << "BLOCK START\n"; + os << "BLOCK START "; + if (m_cf_start) + os << *m_cf_start; + os << "\n"; for (auto& i : m_instructions) { for (int j = 0; j < 2 * (m_nesting_depth + i->nesting_corr()) + 2; ++j) os << ' '; @@ -298,16 +301,25 @@ Block::set_type(Type t, r600_chip_class chip_class) * to 16 slots if the register pressure doesn't get too high. */ m_remaining_slots = 8; + m_cf_start = + new ControlFlowInstr(chip_class >= ISA_CC_CAYMAN ? ControlFlowInstr::cf_tex + : ControlFlowInstr::cf_vtx); break; case gds: + m_cf_start = new ControlFlowInstr(ControlFlowInstr::cf_gds); + m_remaining_slots = chip_class >= ISA_CC_EVERGREEN ? 16 : 8; + break; case tex: + m_cf_start = new ControlFlowInstr(ControlFlowInstr::cf_tex); m_remaining_slots = chip_class >= ISA_CC_EVERGREEN ? 16 : 8; break; case alu: + m_cf_start = new ControlFlowInstr(ControlFlowInstr::cf_alu); /* 128 but a follow up block might need to emit and ADDR + INDEX load */ m_remaining_slots = 118; break; default: + m_cf_start = nullptr; m_remaining_slots = 0xffff; } } diff --git a/src/gallium/drivers/r600/sfn/sfn_instr.h b/src/gallium/drivers/r600/sfn/sfn_instr.h index 48c7091defb..476af6c5b78 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr.h +++ b/src/gallium/drivers/r600/sfn/sfn_instr.h @@ -179,6 +179,10 @@ public: const_iterator begin() const { return m_instructions.begin(); } const_iterator end() const { return m_instructions.end(); } + void set_cf_start(ControlFlowInstr *cf) { m_cf_start = cf; } + ControlFlowInstr *cf_start() { return m_cf_start; } + const ControlFlowInstr *cf_start() const { return m_cf_start; } + bool empty() const { return m_instructions.empty(); } void erase(iterator node); @@ -247,6 +251,7 @@ private: static unsigned s_max_kcache_banks; int m_emitted_rat_instr{0}; uint32_t m_expected_ar_uses{0}; + ControlFlowInstr *m_cf_start{nullptr}; }; class Resource { diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_controlflow.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_controlflow.cpp index 64f13486acc..1584f34c943 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_controlflow.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_controlflow.cpp @@ -97,6 +97,14 @@ ControlFlowInstr::do_print(std::ostream& os) const } } +void +ControlFlowInstr::promote_alu_cf(CFType new_type) +{ + assert(m_type == cf_alu); + assert(new_type == cf_alu_push_before); + m_type = new_type; +} + Instr::Pointer ControlFlowInstr::from_string(std::string type_str) { diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_controlflow.h b/src/gallium/drivers/r600/sfn/sfn_instr_controlflow.h index 8a90b95c84e..1c311b80db3 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_controlflow.h +++ b/src/gallium/drivers/r600/sfn/sfn_instr_controlflow.h @@ -42,6 +42,7 @@ public: void accept(ConstInstrVisitor& visitor) const override; void accept(InstrVisitor& visitor) override; + void promote_alu_cf(CFType new_type); CFType cf_type() const { return m_type; } int nesting_corr() const override; diff --git a/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp index 89d955b174e..9f85a09a668 100644 --- a/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp @@ -705,7 +705,7 @@ BlockScheduler::schedule_alu(Shader::ShaderBlocks& out_blocks, ValueFactory& vf) if (group->has_kill_op()) { assert(!group->has_lds_group_start()); assert(expected_ar_uses == 0); - start_new_block(out_blocks, Block::alu); + start_new_block(out_blocks, Block::unknown); } group->update_readport_reserver(); return success; @@ -714,12 +714,12 @@ BlockScheduler::schedule_alu(Shader::ShaderBlocks& out_blocks, ValueFactory& vf) bool BlockScheduler::schedule_tex(Shader::ShaderBlocks& out_blocks) { - if (m_current_block->type() != Block::tex || m_current_block->remaining_slots() == 0) { + if (!tex_ready.empty() && (m_current_block->type() != Block::tex || + m_current_block->remaining_slots() == 0)) { start_new_block(out_blocks, Block::tex); - m_current_block->set_instr_flag(Instr::force_cf); } - if (!tex_ready.empty() && m_current_block->remaining_slots() > 0) { + if (m_current_block->remaining_slots() > 0) { auto ii = tex_ready.begin(); sfn_log << SfnLog::schedule << "Schedule: " << **ii << "\n"; @@ -742,7 +742,8 @@ BlockScheduler::schedule_tex(Shader::ShaderBlocks& out_blocks) bool BlockScheduler::schedule_vtx(Shader::ShaderBlocks& out_blocks) { - if (m_current_block->type() != Block::vtx || m_current_block->remaining_slots() == 0) { + if (!fetches_ready.empty() && (m_current_block->type() != Block::vtx || + m_current_block->remaining_slots() == 0)) { start_new_block(out_blocks, Block::vtx); m_current_block->set_instr_flag(Instr::force_cf); } diff --git a/src/gallium/drivers/r600/sfn/sfn_shader.cpp b/src/gallium/drivers/r600/sfn/sfn_shader.cpp index 10287e8bc04..d30e409da92 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader.cpp @@ -186,11 +186,29 @@ Shader::emit_instruction_from_string(const std::string& s) { sfn_log << SfnLog::instr << "Create Instr from '" << s << "'\n"; - if (s == "BLOCK_START") { + if (s.compare(0, 11, "BLOCK_START") == 0) { + std::istringstream ins(s.substr(11)); + string type; + ins >> type; if (!m_current_block->empty()) { start_new_block(m_current_block->nesting_offset()); sfn_log << SfnLog::instr << " Emit start block\n"; } + + if (type == "ALU") + m_current_block->set_cf_start(new ControlFlowInstr(ControlFlowInstr::cf_alu)); + else if (type == "ALU_PUSH_BEFORE") + m_current_block->set_cf_start( + new ControlFlowInstr(ControlFlowInstr::cf_alu_push_before)); + else if (type == "GDS") + m_current_block->set_cf_start(new ControlFlowInstr(ControlFlowInstr::cf_gds)); + else if (type == "TEX") + m_current_block->set_cf_start(new ControlFlowInstr(ControlFlowInstr::cf_tex)); + else if (type == "VTX") + m_current_block->set_cf_start(new ControlFlowInstr(ControlFlowInstr::cf_vtx)); + else if (type == "POP") + m_current_block->set_cf_start(new ControlFlowInstr(ControlFlowInstr::cf_pop)); + return; } diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_optimizer_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_optimizer_test.cpp index 323f7d7cea8..1718217a132 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_optimizer_test.cpp +++ b/src/gallium/drivers/r600/sfn/tests/sfn_optimizer_test.cpp @@ -284,7 +284,7 @@ OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 ARRAYS A0[4].x SYSVALUES R0.xy SHADER -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU MOVA_INT AR : R0.x@fully {} ALU MOV A0[0].x : I[0] {WL} @@ -841,7 +841,7 @@ BLOCK_END )"; const char *expect = -R"( + R"( FS CHIPCLASS CAYMAN PROP MAX_COLOR_EXPORTS:1 @@ -850,7 +850,7 @@ PROP COLOR_EXPORT_MASK:15 PROP WRITE_ALL_COLORS:0 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 SHADER -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU MIN_UINT S3.w@free{s} : KC0[0].x L[0x2] {WL} ALU_GROUP_END @@ -858,7 +858,7 @@ ALU_GROUP_BEGIN ALU MOVA_INT IDX0 : S3.w@free{s} {L} ALU_GROUP_END BLOCK_END -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU MOV S4.x@chgr : KC1[IDX0][0].x {W} ALU MOV S4.y@chgr : KC1[IDX0][0].y {W} @@ -896,9 +896,8 @@ BLOCK_START EXPORT_DONE PIXEL 0 S2.xxxx BLOCK_END)"; - const char *expect = -R"(FS + R"(FS CHIPCLASS R600 FAMILY R600 PROP MAX_COLOR_EXPORTS:1 @@ -908,7 +907,7 @@ PROP WRITE_ALL_COLORS:0 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 ARRAYS A1[2].x SHADER -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU MOVA_INT AR : KC0[0].x {L} ALU_GROUP_END @@ -953,9 +952,8 @@ BLOCK_START EXPORT_DONE PIXEL 0 S2.xxxx BLOCK_END)"; - const char *expect = -R"(FS + R"(FS CHIPCLASS R600 FAMILY R600 PROP MAX_COLOR_EXPORTS:1 @@ -965,7 +963,7 @@ PROP WRITE_ALL_COLORS:0 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 ARRAYS A1[2].x SHADER -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU MOVA_INT AR : KC0[0].x {} ALU MOV A1[0].x : KC0[0].y {WL} @@ -1009,9 +1007,8 @@ BLOCK_START EXPORT_DONE PIXEL 0 S2.xxxx BLOCK_END)"; - const char *expect = -R"(FS + R"(FS CHIPCLASS R600 FAMILY RV670 PROP MAX_COLOR_EXPORTS:1 @@ -1021,7 +1018,7 @@ PROP WRITE_ALL_COLORS:0 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 ARRAYS A1[2].x SHADER -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU MOVA_INT AR : KC0[0].x {L} ALU_GROUP_END @@ -1063,9 +1060,8 @@ BLOCK_START EXPORT_DONE PIXEL 0 S2.xxxx BLOCK_END)"; - const char *expect = -R"(FS + R"(FS CHIPCLASS EVERGREEN FAMILY BARTS PROP MAX_COLOR_EXPORTS:1 @@ -1075,7 +1071,7 @@ PROP WRITE_ALL_COLORS:0 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 ARRAYS A1[2].x SHADER -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU MOVA_INT AR : KC0[0].x {L} ALU_GROUP_END @@ -1117,9 +1113,8 @@ BLOCK_START EXPORT_DONE PIXEL 0 S2.xxxx BLOCK_END)"; - const char *expect = -R"(FS + R"(FS CHIPCLASS R700 FAMILY RV770 PROP MAX_COLOR_EXPORTS:1 @@ -1129,7 +1124,7 @@ PROP WRITE_ALL_COLORS:0 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 ARRAYS A1[2].x SHADER -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU MOVA_INT AR : KC0[0].x {L} ALU_GROUP_END @@ -1191,7 +1186,7 @@ PROP WRITE_ALL_COLORS:0 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 ARRAYS A1[2].x SHADER -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU MOVA_INT AR : KC0[0].x {L} ALU_GROUP_END diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_split_address_loads_test.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_split_address_loads_test.cpp index cba5597f1e4..255147cc96c 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_split_address_loads_test.cpp +++ b/src/gallium/drivers/r600/sfn/tests/sfn_split_address_loads_test.cpp @@ -431,7 +431,7 @@ BLOCK_END )"; const char *expect = -R"( + R"( FS CHIPCLASS CAYMAN PROP MAX_COLOR_EXPORTS:1 @@ -440,7 +440,7 @@ PROP COLOR_EXPORT_MASK:15 PROP WRITE_ALL_COLORS:0 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 SHADER -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU MIN_UINT S3.w@free : KC0[0].x L[0x2] {WL} ALU_GROUP_END @@ -448,7 +448,7 @@ ALU_GROUP_BEGIN ALU MOVA_INT IDX0 : S3.w@free {L} ALU_GROUP_END BLOCK_END -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU MOV S4.x@chgr : KC1[IDX0][0].x {W} ALU MOV S4.y@chgr : KC1[IDX0][0].y {W} @@ -502,7 +502,7 @@ PROP COLOR_EXPORT_MASK:15 PROP WRITE_ALL_COLORS:0 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 SHADER -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU MOVA_INT AR : KC0[0].x {} ALU MOV S0.y@free : KC0[1].y {W} @@ -512,10 +512,10 @@ ALU_GROUP_BEGIN ALU SET_CF_IDX0 IDX0 : AR {L} ALU_GROUP_END BLOCK_END -BLOCK_START +BLOCK_START TEX TEX SAMPLE S3.xyzw : S0.zy__ RID:0 SID:0 NNNN BLOCK_END -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU ADD S4.x@chgr : KC1[IDX0][0].x S3.x@chgr {W} ALU ADD S4.y@chgr : KC1[IDX0][0].y S3.y@chgr {W} @@ -559,7 +559,7 @@ BLOCK_END )"; const char *expect = -R"( + R"( FS CHIPCLASS CAYMAN PROP MAX_COLOR_EXPORTS:1 @@ -568,7 +568,7 @@ PROP COLOR_EXPORT_MASK:15 PROP WRITE_ALL_COLORS:0 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 SHADER -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU MIN_UINT S3.x@free : KC0[0].x L[0x2] {W} ALU MIN_UINT S3.y@free : KC0[0].y L[0x2] {W} @@ -582,7 +582,7 @@ ALU_GROUP_BEGIN ALU MOVA_INT IDX1 : S3.y@free {L} ALU_GROUP_END BLOCK_END -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU MOV S4.x@chgr : KC1[IDX0][0].x {WL} ALU_GROUP_END @@ -590,12 +590,12 @@ ALU_GROUP_BEGIN ALU MOVA_INT IDX0 : S3.z@free {L} ALU_GROUP_END BLOCK_END -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU MOV S4.z@chgr : KC1[IDX0][0].z {WL} ALU_GROUP_END BLOCK_END -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU MOV S4.y@chgr : KC1[IDX1][0].y {WL} ALU_GROUP_END @@ -603,7 +603,7 @@ ALU_GROUP_BEGIN ALU MOVA_INT IDX1 : S3.w@free {L} ALU_GROUP_END BLOCK_END -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU MOV S4.w@chgr : KC1[IDX1][0].w {WL} ALU_GROUP_END diff --git a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp index 1783e2c5e56..91bc6318cb7 100644 --- a/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp +++ b/src/gallium/drivers/r600/sfn/tests/sfn_test_shaders.cpp @@ -411,7 +411,7 @@ INPUT LOC:0 VARYING_SLOT:32 INTERP:1 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 SYSVALUES R0.xy__ SHADER -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x VEC_210 {} ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y VEC_210 {} @@ -440,7 +440,7 @@ ALU MOV S1028.x@group : S1027.x {W} ALU MOV S1028.y@group : S1027.y {WL} ALU_GROUP_END BLOCK_END -BLOCK_START +BLOCK_START TEX TEX LD S1029.xyzw : S1028.xy_w RID:0 SID:18 NNNN BLOCK_END BLOCK_START @@ -496,7 +496,7 @@ INPUT LOC:0 VARYING_SLOT:32 INTERP:1 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 SYSVALUES R0.xy__ SHADER -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x {} VEC_210 ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y {} VEC_210 @@ -526,11 +526,10 @@ ALU_GROUP_END ALU_GROUP_BEGIN ALU MOV S1024.x : I[0] {WL} ALU_GROUP_END -BLOCK_START -BLOCK_END +BLOCK_START TEX TEX LD S1029.xyzw : S1028.xy_w RID:0 SID:18 NNNN -BLOCK_START BLOCK_END +BLOCK_START EXPORT_DONE PIXEL 0 S1029.xyzw BLOCK_END )"; @@ -546,7 +545,7 @@ INPUT LOC:0 VARYING_SLOT:32 INTERP:1 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 SYSVALUES R0.xy__ SHADER -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU INTERP_ZW __.x@chan : R0.y@fully Param0.x {} VEC_210 ALU INTERP_ZW __.y@chan : R0.x@fully Param0.y {} VEC_210 @@ -565,7 +564,7 @@ ALU FLT_TO_INT S1026.y@group : S1025.y@chan {W} ALU FLT_TO_INT S1026.z@group : S1025.w@chan {WL} ALU_GROUP_END BLOCK_END -BLOCK_START +BLOCK_START TEX TEX LD S1029.xyzw : S1026.xy_z RID:0 SID:18 NNNN BLOCK_END BLOCK_START @@ -1409,7 +1408,7 @@ PROP COLOR_EXPORT_MASK:15 PROP WRITE_ALL_COLORS:1 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 SHADER -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU ADD S4.x@chan : |KC0[0].x| -KC0[2].x {W} ALU ADD S4.y@chan : |KC0[0].y| -KC0[2].y {WL} @@ -1527,7 +1526,7 @@ PROP COLOR_EXPORT_MASK:15 PROP WRITE_ALL_COLORS:1 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 SHADER -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU SETNE S12.x@chan : KC0[4].x KC0[0].x {W} ALU SETNE S13.y@chan : KC0[4].y KC0[0].y {WL} @@ -1601,7 +1600,7 @@ PROP COLOR_EXPORT_MASK:15 PROP WRITE_ALL_COLORS:1 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 SHADER -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU SETNE_DX10 S5.x@chan : KC0[2].y KC0[0].y {W} ALU SETNE_DX10 S5.y@chan : KC0[2].x KC0[0].x {WL} @@ -1861,7 +1860,7 @@ OUTPUT LOC:4 VARYING_SLOT:35 MASK:15 SYSVALUES R1.xyzw ARRAYS A2[4].xy A2[4].zw SHADER -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU MOV A2[0].x : I[1.0] {W} ALU MOV A2[0].y : L[0x3f8ccccd] {W} @@ -1913,6 +1912,7 @@ ALU_GROUP_BEGIN ALU MULADD_IEEE S19.w@group : KC0[4].w R1.w@fully S17.w {WL} ALU_GROUP_END IF (( ALU PRED_SETGE_INT __.x@free : KC0[0].x L[0x4] {LEP} PUSH_BEFORE )) +BLOCK_START ALU ALU_GROUP_BEGIN ALU ADD_INT S34.x : KC0[0].x L[0xfffffffc] {WL} ALU_GROUP_END @@ -1924,6 +1924,8 @@ IF (( ALU PRED_SETGE_INT __.x@free : KC0[0].x L[0x4] {LEP} PUSH_BEFORE )) ALU MOV A2[AR].w : L[0x3dcccccd] {WL} ALU_GROUP_END ELSE +BLOCK_END +BLOCK_START ALU ALU_GROUP_BEGIN ALU MOV S37.x : KC0[0].x {WL} ALU_GROUP_END @@ -1935,6 +1937,8 @@ ELSE ALU MOV A2[AR].y : L[0x3dcccccd] {WL} ALU_GROUP_END ENDIF +BLOCK_END +BLOCK_START ALU ALU_GROUP_BEGIN ALU MOV S46.x@chgr : A2[0].x {W} ALU MOV S46.y@chgr : A2[0].y {W} @@ -2007,7 +2011,7 @@ PROP WRITE_ALL_COLORS:1 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 ARRAYS A0[2].xy SHADER -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU MOV A0[0].x : KC0[0].x {W} ALU MOV A0[0].y : KC0[0].y {W} @@ -2048,7 +2052,7 @@ PROP WRITE_ALL_COLORS:1 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 ARRAYS A0[2].xy SHADER -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU MOV A0[0].x : KC0[0].x {W} ALU MOV A0[0].y : KC0[0].y {W} @@ -2388,7 +2392,7 @@ INPUT LOC:0 VARYING_SLOT:32 INTERP:2 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 REGISTERS R0.x@fully R0.y@fully SHADER -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU INTERP_XY S1.x@chan : R0.y@fully Param0.x {W} VEC_210 ALU INTERP_XY S1.y@chan : R0.x@fully Param0.y {W} VEC_210 @@ -2408,11 +2412,11 @@ ALU_GROUP_BEGIN ALU MUL_IEEE S3.w@chgr : S1.y@chan S1.w@chan {WL} ALU_GROUP_END BLOCK_END -BLOCK_START +BLOCK_START TEX TEX SAMPLE S4.xyzw : S2.xy__ RID:18 SID:0 NNNN TEX SAMPLE S5.xyzw : S3.zw__ RID:18 SID:0 NNNN BLOCK_END -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU ADD S6.x@group : S5.x@chgr S4.x@chgr {W} ALU ADD S6.y@group : S5.y@chgr S4.y@chgr {W} @@ -2456,11 +2460,14 @@ PROP COLOR_EXPORT_MASK:15 PROP WRITE_ALL_COLORS:1 OUTPUT LOC:0 FRAG_RESULT:2 MASK:15 SHADER +BLOCK_START ALU ALU_GROUP_BEGIN ALU MOV R1.x@free : I[0] {W} ALU MOV S2.y@chan : L[0x38f00000] {WL} ALU_GROUP_END LOOP_BEGIN +BLOCK_END +BLOCK_START ALU ALU_GROUP_BEGIN ALU RECIPSQRT_IEEE S3.x@chan : |R1.x@free| {W} ALU RECIPSQRT_IEEE __.y@chgr : |R1.x@free| {} @@ -2470,8 +2477,14 @@ LOOP_BEGIN ALU SETGT_DX10 S4.x@chan : S3.x@chan S2.y@free {WL} ALU_GROUP_END IF (( ALU PRED_SETNE_INT __.x@free : S4.x@chan I[0] {LEP} PUSH_BEFORE )) +BLOCK_END +BLOCK_START ALU BREAK +BLOCK_END +BLOCK_START ALU ENDIF +BLOCK_END +BLOCK_START ALU ALU_GROUP_BEGIN ALU ADD S5.x@free : S3.x@chan L[0x38f00000] {WL} ALU_GROUP_END @@ -2479,7 +2492,10 @@ LOOP_BEGIN ALU MUL R1.x@free : S5.x@free L[0x38f00000] {WL} ALU_GROUP_END LOOP_END +BLOCK_END +BLOCK_START EXPORT_DONE PIXEL 0 R1.xxxx +BLOCK_END )"; const char *gs_abs_float_nir = @@ -2693,17 +2709,17 @@ const char *vtx_for_tcs_sched = CHIPCLASS EVERGREEN REGISTERS R0.x@fully R0.y@fully SHADER -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU MOV S3.x@chan : R0.x@fully {W} ALU MOV S7.y@chan : I[0] {WL} ALU_GROUP_END BLOCK_END -BLOCK_START +BLOCK_START VTX LOAD_BUF S4.xyzw : S3.x@chan RID:0 LOAD_BUF S8.xyzw : S7.y@chan RID:16 SRF BLOCK_END -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU MUL_UINT24 S10.x@chan : S8.y@chgr R0.y@fully {WL} ALU_GROUP_END @@ -2711,10 +2727,10 @@ ALU_GROUP_BEGIN ALU ADD_INT S12.x@chan : L[0x8] S10.x@chan {WL} ALU_GROUP_END BLOCK_END -BLOCK_START +BLOCK_START VTX LOAD_BUF S5.xyzw : S4.x@chgr + 96b RID:0 BLOCK_END -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU LDS WRITE_REL __.x : S10.x@chan S5.x@chgr S5.y@chgr {L} ALU_GROUP_END @@ -3042,7 +3058,7 @@ CHIPCLASS EVERGREEN OUTPUT LOC:0 VARYING_SLOT:0 MASK:15 REGISTERS R0.x@fully R0.y@fully R0.z@fully SHADER -BLOCK_START +BLOCK_START ALU ALU_GROUP_BEGIN ALU ADD S1026.x@chan : R0.x@fully R0.y@fully {W} ALU MOV S1033.y@chan : I[0] {WL} @@ -3059,11 +3075,11 @@ ALU_GROUP_END ALU_GROUP_BEGIN ALU FLT_TO_INT S1031.x@chan : S1030.x@chan {WL} ALU_GROUP_END -BLOCK_START BLOCK_END +BLOCK_START VTX LOAD_BUF S1034.xyzw : S1033.y@chan RID:16 SRF -BLOCK_START BLOCK_END +BLOCK_START ALU ALU_GROUP_BEGIN ALU MULADD_UINT24 S1036.x@chan : S1034.x@chgr R0.z@fully S1034.z@chgr {WL} ALU_GROUP_END @@ -3100,8 +3116,8 @@ ALU_GROUP_END ALU_GROUP_BEGIN ALU MOV S1040.w@group : I[LDS_OQ_A_POP] {WL} ALU_GROUP_END -BLOCK_START BLOCK_END +BLOCK_START EXPORT_DONE POS 0 S1040.xyzw EXPORT_DONE PARAM 0 R0.____ BLOCK_END)";