diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp index e54d3342a43..9f46587a892 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp @@ -1808,9 +1808,10 @@ static bool emit_alu_b2x(const nir_alu_instr& alu, AluInlineConstants mask, Shad for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) { if (alu.dest.write_mask & (1 << i)){ + auto src = value_factory.src(alu.src[0], i); ir = new AluInstr(op2_and_int, value_factory.dest(alu.dest, i, pin), - value_factory.src(alu.src[0], i), + src, value_factory.inline_const(mask, 0), {alu_write}); if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg); @@ -2396,17 +2397,20 @@ static bool emit_alu_trans_op1_cayman(const nir_alu_instr& alu, EAluOp opcode, S auto pin = pin_for_components(alu); + unsigned ncomp = nir_dest_num_components(alu.dest.dest) == 4 ? 4 : 3; + /* todo: Actually we need only three channels, but then we have * to make sure that we don't hava w dest */ - for (unsigned j = 0; j < 4; ++j) { + for (unsigned j = 0; j < ncomp; ++j) { if (alu.dest.write_mask & (1 << j)) { - AluInstr::SrcValues srcs(4); - PRegister dest = value_factory.dest(alu.dest.dest, j, pin); + AluInstr::SrcValues srcs(ncomp); + PRegister dest = value_factory.dest(alu.dest.dest, j, pin, + (1 << ncomp) - 1); - for (unsigned i = 0; i < 4; ++i) + for (unsigned i = 0; i < ncomp; ++i) srcs[i] = value_factory.src(src0, j); - auto ir = new AluInstr(opcode, dest, srcs, AluInstr::last_write, 4); + auto ir = new AluInstr(opcode, dest, srcs, AluInstr::last_write, ncomp); if (alu.src[0].abs) ir->set_alu_flag(alu_src0_abs); if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg); diff --git a/src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp b/src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp index 74fca92b7a2..1b2aae5f38f 100644 --- a/src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp @@ -162,10 +162,10 @@ void ValueFactory::inject_value(const nir_dest& dest, int chan, PVirtualValue va } PRegister ValueFactory::dest(const nir_alu_dest& dst, int chan, - Pin pin_channel) + Pin pin_channel, uint8_t chan_mask) { sfn_log << SfnLog::reg << "Search (ref) " << &dst << "\n"; - return dest(dst.dest, chan, pin_channel); + return dest(dst.dest, chan, pin_channel, chan_mask); } class TranslateRegister: public RegisterVisitor { @@ -214,10 +214,11 @@ PRegister ValueFactory::resolve_array(nir_register *reg, nir_src *indirect, } } -PRegister ValueFactory::dest(const nir_dest& dst, int chan, Pin pin_channel) +PRegister ValueFactory::dest(const nir_dest& dst, int chan, Pin pin_channel, + uint8_t chan_mask) { if (dst.is_ssa) { - return dest(dst.ssa, chan, pin_channel); + return dest(dst.ssa, chan, pin_channel, chan_mask); } else { return resolve_array(dst.reg.reg, dst.reg.indirect, dst.reg.base_offset, chan); @@ -253,7 +254,7 @@ PRegister ValueFactory::temp_register(int pinned_channel, bool is_ssa) { int sel = m_next_register_index++; int chan = (pinned_channel >= 0) ? - pinned_channel : m_channel_counts.least_used(); + pinned_channel : m_channel_counts.least_used(0xf); auto reg = new Register( sel, chan, pinned_channel >= 0 ? pin_chan : pin_free); @@ -348,7 +349,8 @@ PRegister ValueFactory::dummy_dest(unsigned chan) } PRegister -ValueFactory::dest(const nir_ssa_def& ssa, int chan, Pin pin_channel) +ValueFactory::dest(const nir_ssa_def& ssa, int chan, Pin pin_channel, + uint8_t chan_mask) { RegisterKey key(ssa.index, chan, vp_ssa); @@ -364,11 +366,13 @@ ValueFactory::dest(const nir_ssa_def& ssa, int chan, Pin pin_channel) sel = isel->second; else { sel = m_next_register_index++; + sfn_log << SfnLog::reg << "Assign " << sel << " to index " + << ssa.index << " in " << &m_ssa_index_to_sel << "\n"; m_ssa_index_to_sel[ssa.index] = sel; } if (pin_channel == pin_free) - chan = m_channel_counts.least_used(); + chan = m_channel_counts.least_used(chan_mask); auto vreg = new Register( sel, chan, pin_channel); m_channel_counts.inc_count(chan); diff --git a/src/gallium/drivers/r600/sfn/sfn_valuefactory.h b/src/gallium/drivers/r600/sfn/sfn_valuefactory.h index 74ee1a5496e..51fd66e080d 100644 --- a/src/gallium/drivers/r600/sfn/sfn_valuefactory.h +++ b/src/gallium/drivers/r600/sfn/sfn_valuefactory.h @@ -167,10 +167,12 @@ struct register_key_hash { class ChannelCounts { public: void inc_count(int chan) {++m_counts[chan];} - int least_used() const { + int least_used(uint8_t mask) const { int least_used = 0; uint32_t count = m_counts[0]; for (int i = 1; i < 4; ++i) { + if (!((1 << i) & mask)) + continue; if (count > m_counts[i]) { count = m_counts[i]; least_used = i; @@ -214,9 +216,9 @@ public: const std::vector& components); - PRegister dest(const nir_alu_dest& dest, int chan, Pin pin_channel); - PRegister dest(const nir_dest& dest, int chan, Pin pin_channel); - PRegister dest(const nir_ssa_def& dest, int chan, Pin pin_channel); + PRegister dest(const nir_alu_dest& dest, int chan, Pin pin_channel, uint8_t chan_mask = 0xf); + PRegister dest(const nir_dest& dest, int chan, Pin pin_channel, uint8_t chan_mask = 0xf); + PRegister dest(const nir_ssa_def& dest, int chan, Pin pin_channel, uint8_t chan_mask = 0xf); PRegister dummy_dest(unsigned chan); PRegister temp_register(int pinned_channel = -1, bool is_ssa = true);