r600/sfn: move handling of legacy math rules to assembler

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20451>
This commit is contained in:
Gert Wollny 2022-12-22 13:11:19 +01:00 committed by Marge Bot
parent 46a493a2f0
commit e2eb7cd3d8
2 changed files with 38 additions and 20 deletions

View file

@ -48,7 +48,7 @@ extern const std::map<ESDOp, int> ds_opcode_map;
class AssamblerVisitor : public ConstInstrVisitor {
public:
AssamblerVisitor(r600_shader *sh, const r600_shader_key& key);
AssamblerVisitor(r600_shader *sh, const r600_shader_key& key, bool legacy_math_rules);
void visit(const AluInstr& instr) override;
void visit(const AluGroup& instr) override;
@ -92,6 +92,8 @@ public:
void emit_alu_op(const AluInstr& ai);
void emit_lds_op(const AluInstr& lds);
auto translate_for_mathrules(EAluOp op) -> EAluOp;
void emit_wait_ack();
/* Start initialized in constructor */
@ -118,12 +120,13 @@ public:
bool m_has_pos_output{false};
bool m_last_op_was_barrier{false};
bool m_result{true};
bool m_legacy_math_rules{false};
};
bool
Assembler::lower(Shader *shader)
{
AssamblerVisitor ass(m_sh, m_key);
AssamblerVisitor ass(m_sh, m_key, shader->has_flag(Shader::sh_legacy_math_rules));
auto& blocks = shader->func();
for (auto b : blocks) {
@ -137,13 +140,15 @@ Assembler::lower(Shader *shader)
return ass.m_result;
}
AssamblerVisitor::AssamblerVisitor(r600_shader *sh, const r600_shader_key& key):
AssamblerVisitor::AssamblerVisitor(r600_shader *sh, const r600_shader_key& key,
bool legacy_math_rules):
m_key(key),
m_shader(sh),
m_bc(&sh->bc),
m_callstack(sh->bc),
ps_alpha_to_one(key.ps.alpha_to_one)
ps_alpha_to_one(key.ps.alpha_to_one),
m_legacy_math_rules(legacy_math_rules)
{
if (m_shader->processor_type == PIPE_SHADER_FRAGMENT)
m_max_color_exports = MAX2(m_key.ps.nr_cbufs, 1);
@ -260,25 +265,44 @@ AssamblerVisitor::emit_lds_op(const AluInstr& lds)
m_result = false;
}
auto AssamblerVisitor::translate_for_mathrules(EAluOp op) -> EAluOp
{
switch (op) {
case op2_dot_ieee: return op2_dot;
case op2_dot4_ieee: return op2_dot4;
case op2_mul_ieee: return op2_mul;
case op3_muladd_ieee : return op2_mul_ieee;
default:
return op;
}
}
void
AssamblerVisitor::emit_alu_op(const AluInstr& ai)
{
struct r600_bytecode_alu alu;
memset(&alu, 0, sizeof(alu));
if (opcode_map.find(ai.opcode()) == opcode_map.end()) {
auto opcode = ai.opcode();
if (m_legacy_math_rules)
opcode = translate_for_mathrules(opcode);
auto hw_opcode = opcode_map.find(opcode);
if (hw_opcode == opcode_map.end()) {
std::cerr << "Opcode not handled for " << ai << "\n";
m_result = false;
return;
}
// skip multiple barriers
if (m_last_op_was_barrier && ai.opcode() == op0_group_barrier)
if (m_last_op_was_barrier && opcode == op0_group_barrier)
return;
m_last_op_was_barrier = ai.opcode() == op0_group_barrier;
m_last_op_was_barrier = opcode == op0_group_barrier;
alu.op = opcode_map.at(ai.opcode());
alu.op = hw_opcode->second;
auto dst = ai.dest();
if (dst) {

View file

@ -500,7 +500,7 @@ AluInstr::allowed_dest_chan_mask() const
if (has_alu_flag(alu_is_cayman_trans))
return (1 << alu_slots()) - 1;
if (m_opcode == op2_dot_ieee || m_opcode == op2_dot) {
if (m_opcode == op2_dot_ieee) {
return (1 << (4 - alu_slots())) - 1;
}
}
@ -722,12 +722,12 @@ AluInstr::split(ValueFactory& vf)
m_dest->del_parent(this);
int start_slot = 0;
bool is_dot = m_opcode == op2_dot || opcode() == op2_dot_ieee;
bool is_dot = m_opcode == op2_dot_ieee;
auto last_opcode = m_opcode;
if (is_dot) {
start_slot = m_dest->chan();
last_opcode = m_opcode == op2_dot ? op2_mul : op2_mul_ieee;
last_opcode = op2_mul_ieee;
}
@ -2515,9 +2515,7 @@ emit_dot(const nir_alu_instr& alu, int n, Shader& shader)
srcs[2 * i + 1] = value_factory.src(src1, i);
}
auto op =
unlikely(shader.has_flag(Shader::sh_legacy_math_rules)) ? op2_dot : op2_dot_ieee;
AluInstr *ir = new AluInstr(op, dest, srcs, AluInstr::last_write, n);
AluInstr *ir = new AluInstr(op2_dot_ieee, dest, srcs, AluInstr::last_write, n);
if (src0.negate)
ir->set_alu_flag(alu_src0_neg);
@ -2556,9 +2554,7 @@ emit_dot4(const nir_alu_instr& alu, int nelm, Shader& shader)
srcs[2 * i + 1] = value_factory.zero();
}
auto op =
unlikely(shader.has_flag(Shader::sh_legacy_math_rules)) ? op2_dot4 : op2_dot4_ieee;
AluInstr *ir = new AluInstr(op, dest, srcs, AluInstr::last_write, 4);
AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::last_write, 4);
if (src0.negate)
ir->set_alu_flag(alu_src0_neg);
@ -2595,9 +2591,7 @@ emit_fdph(const nir_alu_instr& alu, Shader& shader)
srcs[6] = value_factory.one();
srcs[7] = value_factory.src(src1, 3);
auto op =
unlikely(shader.has_flag(Shader::sh_legacy_math_rules)) ? op2_dot4 : op2_dot4_ieee;
AluInstr *ir = new AluInstr(op, dest, srcs, AluInstr::last_write, 4);
AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::last_write, 4);
if (src0.negate)
ir->set_alu_flag(alu_src0_neg);