r600/sfn: allow source mods for per source with multi-slot ops

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23702>
This commit is contained in:
Gert Wollny 2023-06-16 17:50:55 +02:00 committed by Marge Bot
parent 0ff3c4bef2
commit 2f6ded1098
8 changed files with 159 additions and 76 deletions

View file

@ -242,13 +242,8 @@ enum EAluOp {
};
enum AluModifiers {
alu_src0_neg,
alu_src0_abs,
alu_src0_rel,
alu_src1_neg,
alu_src1_abs,
alu_src1_rel,
alu_src2_neg,
alu_src2_rel,
alu_dst_clamp,
alu_dst_rel,
@ -265,6 +260,7 @@ enum AluModifiers {
alu_lds_address,
alu_no_schedule_bias,
alu_64bit_op,
alu_flag_none,
alu_flag_count
};

View file

@ -338,9 +338,9 @@ AssamblerVisitor::emit_alu_op(const AluInstr& ai)
for (unsigned i = 0; i < ai.n_sources(); ++i) {
buffer_offset = copy_src(alu.src[i], ai.src(i));
alu.src[i].neg = ai.has_alu_flag(AluInstr::src_neg_flags[i]);
alu.src[i].neg = ai.has_source_mod(i, AluInstr::mod_neg);
if (!alu.is_op3)
alu.src[i].abs = ai.has_alu_flag(AluInstr::src_abs_flags[i]);
alu.src[i].abs = ai.has_source_mod(i, AluInstr::mod_abs);
if (buffer_offset && kcache_index_mode == bim_none) {
auto idx_reg = buffer_offset->as_register();

View file

@ -217,9 +217,6 @@ const std::map<AluBankSwizzle, std::string> AluInstr::bank_swizzle_map = {
{alu_vec_210, "VEC_210"}
};
const AluModifiers AluInstr::src_abs_flags[2] = {alu_src0_abs, alu_src1_abs};
const AluModifiers AluInstr::src_neg_flags[3] = {
alu_src0_neg, alu_src1_neg, alu_src2_neg};
const AluModifiers AluInstr::src_rel_flags[3] = {
alu_src0_rel, alu_src1_rel, alu_src2_rel};
@ -275,6 +272,7 @@ AluInstr::do_print(std::ostream& os) const
const int n_source_per_slot =
has_alu_flag(alu_is_lds) ? m_src.size() : alu_ops.at(m_opcode).nsrc;
for (int s = 0; s < m_alu_slots; ++s) {
if (s > 0)
@ -284,12 +282,12 @@ AluInstr::do_print(std::ostream& os) const
int pflags = 0;
if (i)
os << ' ';
if (has_alu_flag(src_neg_flags[k]))
if (has_source_mod(i, mod_neg))
pflags |= ValuePrintFlags::has_neg;
if (has_alu_flag(src_rel_flags[k]))
pflags |= ValuePrintFlags::is_rel;
if (i < 2)
if (has_alu_flag(src_abs_flags[k]))
if (n_source_per_slot <= 2)
if (has_source_mod(i, mod_abs))
pflags |= ValuePrintFlags::has_abs;
if (pflags & ValuePrintFlags::has_neg)
@ -434,7 +432,7 @@ AluInstr::can_copy_propagate() const
if (m_opcode != op1_mov)
return false;
if (has_alu_flag(alu_src0_abs) || has_alu_flag(alu_src0_neg) ||
if (has_source_mod(0, mod_abs) || has_source_mod(0, mod_neg) ||
has_alu_flag(alu_dst_clamp))
return false;
@ -834,16 +832,16 @@ AluInstr::split(ValueFactory& vf)
instr->set_blockid(block_id(), index());
if (s == 0 || !m_alu_flags.test(alu_64bit_op)) {
if (has_alu_flag(alu_src0_neg))
instr->set_alu_flag(alu_src0_neg);
if (has_alu_flag(alu_src1_neg))
instr->set_alu_flag(alu_src1_neg);
if (has_alu_flag(alu_src2_neg))
instr->set_alu_flag(alu_src2_neg);
if (has_alu_flag(alu_src0_abs))
instr->set_alu_flag(alu_src0_abs);
if (has_alu_flag(alu_src1_abs))
instr->set_alu_flag(alu_src1_abs);
if (has_source_mod(nsrc * k + 0, mod_neg))
instr->set_source_mod(0, mod_neg);
if (has_source_mod(nsrc * k + 1, mod_neg))
instr->set_source_mod(1, mod_neg);
if (has_source_mod(nsrc * k + 2, mod_neg))
instr->set_source_mod(2, mod_neg);
if (has_source_mod(nsrc * k + 0, mod_abs))
instr->set_source_mod(0, mod_abs);
if (has_source_mod(nsrc * k + 1, mod_abs))
instr->set_source_mod(1, mod_abs);
}
if (has_alu_flag(alu_dst_clamp))
instr->set_alu_flag(alu_dst_clamp);
@ -1077,7 +1075,7 @@ AluInstr::from_string(istream& is, ValueFactory& value_factory, AluGroup *group,
int slots = 0;
uint32_t src_mods = 0;
SrcValues sources;
do {
++t;
@ -1085,19 +1083,13 @@ AluInstr::from_string(istream& is, ValueFactory& value_factory, AluGroup *group,
string srcstr = *t++;
if (srcstr[0] == '-') {
if (!slots)
flags.insert(AluInstr::src_neg_flags[i]);
else
assert(flags.find(AluInstr::src_neg_flags[i]) != flags.end());
src_mods |= AluInstr::mod_neg << (2 * sources.size());
srcstr = srcstr.substr(1);
}
if (srcstr[0] == '|') {
assert(srcstr[srcstr.length() - 1] == '|');
if (!slots)
flags.insert(AluInstr::src_abs_flags[i]);
else
assert(flags.find(AluInstr::src_abs_flags[i]) != flags.end());
src_mods |= AluInstr::mod_abs << (2 * sources.size());
srcstr = srcstr.substr(1, srcstr.length() - 2);
}
@ -1197,6 +1189,7 @@ AluInstr::from_string(istream& is, ValueFactory& value_factory, AluGroup *group,
else
retval = new AluInstr(op_descr.alu_opcode, dest, sources, flags, slots);
retval->m_source_modifiers = src_mods;
retval->set_bank_swizzle(bank_swizzle);
retval->set_cf_type(cf);
if (group) {
@ -1303,14 +1296,23 @@ bool AluInstr::is_kill() const
}
}
enum AluMods {
mod_none,
mod_src0_abs,
mod_src0_neg,
mod_dest_clamp,
};
static bool
emit_alu_b2x(const nir_alu_instr& alu, AluInlineConstants mask, Shader& shader);
static bool
emit_alu_op1(const nir_alu_instr& alu,
EAluOp opcode,
Shader& shader,
const AluOpFlags& flags = 0);
AluMods mod = mod_none);
static bool
emit_alu_op1_64bit(const nir_alu_instr& alu,
EAluOp opcode,
@ -1658,7 +1660,7 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
return emit_alu_op3(*alu, op3_cnde_int, shader, {0, 2, 1});
case nir_op_fabs:
return emit_alu_op1(*alu, op1_mov, shader, {1 << alu_src0_abs});
return emit_alu_op1(*alu, op1_mov, shader, mod_src0_abs);
case nir_op_fadd:
return emit_alu_op2(*alu, op2_add, shader);
case nir_op_fceil:
@ -1716,7 +1718,7 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
return emit_alu_op2(*alu, op2_mul, shader);
case nir_op_fneg:
return emit_alu_op1(*alu, op1_mov, shader, {1 << alu_src0_neg});
return emit_alu_op1(*alu, op1_mov, shader, mod_src0_neg);
case nir_op_fneu32:
return emit_alu_op2(*alu, op2_setne_dx10, shader);
case nir_op_fneu:
@ -1725,7 +1727,7 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
case nir_op_fround_even:
return emit_alu_op1(*alu, op1_rndne, shader);
case nir_op_fsat:
return emit_alu_op1(*alu, op1_mov, shader, {1 << alu_dst_clamp});
return emit_alu_op1(*alu, op1_mov, shader, mod_dest_clamp);
case nir_op_fsub:
return emit_alu_op2(*alu, op2_add, shader, op2_opt_neg_src1);
case nir_op_ftrunc:
@ -1939,7 +1941,7 @@ emit_alu_neg(const nir_alu_instr& alu, Shader& shader)
{alu_write});
group->add_instruction(ir);
}
ir->set_alu_flag(alu_src0_neg);
ir->set_source_mod(0, AluInstr::mod_neg);
}
if (ir)
ir->set_alu_flag(alu_last_instr);
@ -1963,7 +1965,7 @@ emit_alu_abs64(const nir_alu_instr& alu, Shader& shader)
value_factory.dest(alu.dest, 1, pin_chan),
value_factory.src64(alu.src[0], 0, 1),
AluInstr::last_write);
ir->set_alu_flag(alu_src0_abs);
ir->set_source_mod(0, AluInstr::mod_abs);
shader.emit_instruction(ir);
return true;
}
@ -2066,6 +2068,8 @@ emit_alu_op1_64bit_trans(const nir_alu_instr& alu, EAluOp opcode, Shader& shader
value_factory.src64(alu.src[0], 0, 0),
i < 2 ? AluInstr::write : AluInstr::empty);
if (opcode == op1_sqrt_64)
ir->set_source_mod(0, AluInstr::mod_abs);
group->add_instruction(ir);
}
if (ir)
@ -2262,7 +2266,7 @@ static bool
emit_alu_op1(const nir_alu_instr& alu,
EAluOp opcode,
Shader& shader,
const AluOpFlags& flags)
AluMods mod)
{
auto& value_factory = shader.value_factory();
@ -2271,13 +2275,20 @@ emit_alu_op1(const nir_alu_instr& alu,
for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest); ++i) {
if (alu.dest.write_mask & (1 << i)) {
ir = new AluInstr(opcode,
value_factory.dest(alu.dest, i, pin),
value_factory.src(alu.src[0], i),
{alu_write});
if (flags.test(alu_dst_clamp) || alu.dest.saturate)
switch (mod) {
case mod_src0_abs:
ir->set_source_mod(0, AluInstr::mod_abs); break;
case mod_src0_neg:
ir->set_source_mod(0, AluInstr::mod_neg); break;
case mod_dest_clamp:
ir->set_alu_flag(alu_dst_clamp);
default:;
}
shader.emit_instruction(ir);
}
}
@ -2315,7 +2326,7 @@ emit_alu_op2(const nir_alu_instr& alu,
value_factory.src(*src1, i),
{alu_write});
if (src1_negate)
ir->set_alu_flag(alu_src1_neg);
ir->set_source_mod(1, AluInstr::mod_neg);
shader.emit_instruction(ir);
}
}
@ -2426,8 +2437,12 @@ emit_any_all_fcomp(const nir_alu_instr& alu, EAluOp op, int nc, bool all, Shader
ir = new AluInstr(op1_max4, max_val, s, AluInstr::last_write, 4);
if (all)
ir->set_alu_flag(alu_src0_neg);
if (all) {
ir->set_source_mod(0, AluInstr::mod_neg);
ir->set_source_mod(1, AluInstr::mod_neg);
ir->set_source_mod(2, AluInstr::mod_neg);
ir->set_source_mod(3, AluInstr::mod_neg);
}
shader.emit_instruction(ir);
@ -2442,7 +2457,7 @@ emit_any_all_fcomp(const nir_alu_instr& alu, EAluOp op, int nc, bool all, Shader
value_factory.inline_const(ALU_SRC_1, 0),
AluInstr::last_write);
if (all)
ir->set_alu_flag(alu_src1_neg);
ir->set_source_mod(1, AluInstr::mod_neg);
shader.emit_instruction(ir);
return true;

View file

@ -49,6 +49,13 @@ public:
op2_opt_abs_src0 = 1 << 2
};
enum SourceMod {
mod_none = 0,
mod_abs = 1,
mod_neg = 2
};
static constexpr const AluBankSwizzle bs[6] = {
alu_vec_012, alu_vec_021, alu_vec_120, alu_vec_102, alu_vec_201, alu_vec_210};
@ -192,6 +199,16 @@ public:
void inc_ar_uses() { ++m_num_ar_uses;}
auto num_ar_uses() const {return m_num_ar_uses;}
void set_source_mod(int src, SourceMod mod) {
m_source_modifiers |= mod << (2 * src);
}
auto has_source_mod(int src, SourceMod mod) const {
return (m_source_modifiers & (mod << (2 * src))) != 0;
}
void reset_source_mod(int src, SourceMod mod) {
m_source_modifiers &= ~(mod << (2 * src));
}
private:
friend class AluGroup;
@ -228,6 +245,7 @@ private:
AluGroup *m_parent_group{nullptr};
unsigned m_allowed_dest_mask{0xf};
unsigned m_num_ar_uses{0};
uint32_t m_source_modifiers{0};
};
class AluInstrVisitor : public InstrVisitor {

View file

@ -478,8 +478,8 @@ CopyPropFwdVisitor::propagate_to(RegisterVec4& value, Instr *instr)
return;
if ((parents[i]->opcode() != op1_mov) ||
parents[i]->has_alu_flag(alu_src0_neg) ||
parents[i]->has_alu_flag(alu_src0_abs) ||
parents[i]->has_source_mod(0, AluInstr::mod_neg) ||
parents[i]->has_source_mod(0, AluInstr::mod_abs) ||
parents[i]->has_alu_flag(alu_dst_clamp) ||
parents[i]->has_alu_flag(alu_src0_rel))
return;
@ -854,7 +854,8 @@ ReplaceConstSource::visit(AluInstr *alu)
if (alu->opcode() != op1_mov)
return;
if (alu->has_alu_flag(alu_src0_abs) || alu->has_alu_flag(alu_src0_neg))
if (alu->has_source_mod(0, AluInstr::mod_abs) ||
alu->has_source_mod(0, AluInstr::mod_neg))
return;
auto src = alu->psrc(0);

View file

@ -263,16 +263,14 @@ ReplacePredicate::visit(AluInstr *alu)
m_pred->set_op(new_op);
m_pred->set_sources(alu->sources());
if (alu->has_alu_flag(alu_src0_abs))
m_pred->set_alu_flag(alu_src0_abs);
if (alu->has_alu_flag(alu_src1_abs))
m_pred->set_alu_flag(alu_src1_abs);
std::array<AluInstr::SourceMod, 2> mods = { AluInstr::mod_abs, AluInstr::mod_neg };
if (alu->has_alu_flag(alu_src0_neg))
m_pred->set_alu_flag(alu_src0_neg);
if (alu->has_alu_flag(alu_src1_neg))
m_pred->set_alu_flag(alu_src1_neg);
for (int i = 0; i < 2; ++i) {
for (auto m : mods) {
if (alu->has_source_mod(i, m))
m_pred->set_source_mod(i, m);
}
}
success = true;
}

View file

@ -44,8 +44,8 @@ TEST_F(InstrTest, test_alu_uni_op_mov)
EXPECT_FALSE(alu.has_alu_flag(alu_last_instr));
EXPECT_FALSE(alu.end_group());
EXPECT_FALSE(alu.has_alu_flag(alu_op3));
EXPECT_FALSE(alu.has_alu_flag(alu_src0_abs));
EXPECT_FALSE(alu.has_alu_flag(alu_src0_neg));
EXPECT_FALSE(alu.has_source_mod(0, AluInstr::mod_abs));
EXPECT_FALSE(alu.has_source_mod(0, AluInstr::mod_neg));
EXPECT_EQ(alu.opcode(), op1_mov);
@ -69,11 +69,11 @@ TEST_F(InstrTest, test_alu_uni_op_mov)
EXPECT_FALSE(alu.psrc(1));
EXPECT_FALSE(alu.psrc(2));
alu.set_alu_flag(alu_src0_abs);
EXPECT_TRUE(alu.has_alu_flag(alu_src0_abs));
alu.set_source_mod(0, AluInstr::mod_abs);;
EXPECT_TRUE(alu.has_source_mod(0, AluInstr::mod_abs));
alu.set_alu_flag(alu_src0_neg);
EXPECT_TRUE(alu.has_alu_flag(alu_src0_neg));
alu.set_source_mod(0, AluInstr::mod_neg);
EXPECT_TRUE(alu.has_source_mod(0, AluInstr::mod_neg));
}
TEST_F(InstrTest, test_alu_op2)
@ -89,9 +89,9 @@ TEST_F(InstrTest, test_alu_op2)
EXPECT_TRUE(alu.has_alu_flag(alu_last_instr));
EXPECT_FALSE(alu.has_alu_flag(alu_op3));
EXPECT_FALSE(alu.has_alu_flag(alu_src0_neg));
EXPECT_FALSE(alu.has_alu_flag(alu_src1_neg));
EXPECT_FALSE(alu.has_alu_flag(alu_src2_neg));
EXPECT_FALSE(alu.has_source_mod(0, AluInstr::mod_neg));
EXPECT_FALSE(alu.has_source_mod(1, AluInstr::mod_neg));
EXPECT_FALSE(alu.has_source_mod(2, AluInstr::mod_neg));
EXPECT_FALSE(alu.has_alu_flag(alu_src0_rel));
EXPECT_FALSE(alu.has_alu_flag(alu_src1_rel));

View file

@ -72,7 +72,8 @@ TEST_F(TestInstrFromString, test_alu_mov_neg)
AluInstr expect(op1_mov,
new Register(2000, 1, pin_none),
new Register(1999, 0, pin_none),
{alu_write, alu_last_instr, alu_src0_neg});
{alu_write, alu_last_instr});
expect.set_source_mod(0, AluInstr::mod_neg);
check("ALU MOV R2000.y : -R1999.x {WL}", expect);
}
@ -83,7 +84,8 @@ TEST_F(TestInstrFromString, test_alu_mov_abs)
AluInstr expect(op1_mov,
new Register(2000, 1, pin_none),
new Register(1999, 0, pin_none),
{alu_write, alu_last_instr, alu_src0_abs});
{alu_write, alu_last_instr});
expect.set_source_mod(0, AluInstr::mod_abs);
check("ALU MOV R2000.y : |R1999.x| {WL}", expect);
}
@ -94,7 +96,10 @@ TEST_F(TestInstrFromString, test_alu_mov_neg_abs)
AluInstr expect(op1_mov,
new Register(2000, 1, pin_none),
new Register(1999, 0, pin_none),
{alu_write, alu_src0_neg, alu_src0_abs});
{alu_write});
expect.set_source_mod(0, AluInstr::mod_abs);
expect.set_source_mod(0, AluInstr::mod_neg);
check("ALU MOV R2000.y : -|R1999.x| {W}", expect);
}
@ -131,7 +136,9 @@ TEST_F(TestInstrFromString, test_alu_add_neg2)
new Register(2000, 1, pin_none),
new Register(1999, 3, pin_none),
new Register(1998, 2, pin_none),
{alu_last_instr, alu_src1_neg});
{alu_last_instr});
expect.set_source_mod(1, AluInstr::mod_neg);
check("ALU ADD __.y : R1999.w -R1998.z {L}", expect);
}
@ -143,7 +150,8 @@ TEST_F(TestInstrFromString, test_alu_sete_update_pref)
new Register(2000, 1, pin_none),
new Register(1999, 3, pin_none),
new Register(1998, 2, pin_none),
{alu_last_instr, alu_src1_neg, alu_update_pred});
{alu_last_instr, alu_update_pred});
expect.set_source_mod(1, AluInstr::mod_neg);
check("ALU SETE __.y : R1999.w -R1998.z {LP}", expect);
}
@ -167,7 +175,8 @@ TEST_F(TestInstrFromString, test_alu_setne_update_exec)
new Register(2000, 1, pin_none),
new Register(1999, 3, pin_none),
new Register(1998, 2, pin_none),
{alu_last_instr, alu_src1_neg, alu_update_exec});
{alu_last_instr, alu_update_exec});
expect.set_source_mod(1, AluInstr::mod_neg);
check("ALU SETNE __.y : R1999.w -R1998.z {LE}", expect);
}
@ -179,7 +188,8 @@ TEST_F(TestInstrFromString, test_alu_add_abs2)
new Register(2000, 1, pin_none),
new Register(1999, 3, pin_none),
new Register(1998, 2, pin_none),
{alu_write, alu_last_instr, alu_src1_abs});
{alu_write, alu_last_instr});
expect.set_source_mod(1, AluInstr::mod_abs);
check("ALU ADD R2000.y : R1999.w |R1998.z| {WL}", expect);
}
@ -191,7 +201,10 @@ TEST_F(TestInstrFromString, test_alu_add_abs2_neg2)
new Register(2000, 1, pin_none),
new Register(1999, 3, pin_none),
new Register(1998, 2, pin_none),
{alu_write, alu_last_instr, alu_src1_abs, alu_src1_neg});
{alu_write, alu_last_instr});
expect.set_source_mod(1, AluInstr::mod_neg);
expect.set_source_mod(1, AluInstr::mod_abs);
check("ALU ADD R2000.y : R1999.w -|R1998.z| {WL}", expect);
}
@ -219,7 +232,7 @@ TEST_F(TestInstrFromString, test_alu_muladd_neg3)
new Register(1999, 3, pin_none),
new Register(1998, 2, pin_none),
new Register(2000, 1, pin_none),
{alu_last_instr, alu_src2_neg});
{alu_last_instr});
check("ALU MULADD_IEEE __.y : R1999.w R1998.z -R2000.y {L}", expect);
}
@ -268,6 +281,48 @@ TEST_F(TestInstrFromString, test_alu_dot4_ieee)
check(init, expect);
}
TEST_F(TestInstrFromString, test_alu_dot4_with_mods)
{
add_dest_from_string("R199.x");
add_dest_from_string("R199.y");
add_dest_from_string("R199.z");
add_dest_from_string("R199.w");
add_dest_from_string("R198.x");
add_dest_from_string("R198.y");
add_dest_from_string("R198.z");
add_dest_from_string("R198.w");
auto init = std::string("ALU DOT4_IEEE R2000.y : -R199.x R198.w + R199.y |R198.z| + "
"-|R199.z| R198.y + -R199.w R198.x {WL}");
AluInstr expect(op2_dot4_ieee,
new Register(2000, 1, pin_none),
{new Register(199, 0, pin_none),
new Register(198, 3, pin_none),
new Register(199, 1, pin_none),
new Register(198, 2, pin_none),
new Register(199, 2, pin_none),
new Register(198, 1, pin_none),
new Register(199, 3, pin_none),
new Register(198, 0, pin_none)},
{alu_write, alu_last_instr},
4);
expect.set_source_mod(0, AluInstr::mod_neg);
expect.set_source_mod(3, AluInstr::mod_abs);
expect.set_source_mod(4, AluInstr::mod_neg);
expect.set_source_mod(4, AluInstr::mod_abs);
expect.set_source_mod(7, AluInstr::mod_neg);
check(init, expect);
auto instr = from_string(init);
std::ostringstream print_str;
print_str << *instr;
EXPECT_EQ(print_str.str(), init);
}
TEST_F(TestInstrFromString, test_alu_mov_cf)
{
add_dest_from_string("R1999.x");