r600/sfn: when emitting fp64 op2 groups pre-load values

Since the group is created from the onset, we have to make
sure that four or eight src values don't have a readport
conflict, so force a pre-loading of the values to registers
evenly distributed over the channels and let copy-propagation
take care of cleaning up un-neccesary moves.

Fixes: 79ca456b48
   r600/sfn: rewrite NIR backend

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28840>
(cherry picked from commit 07995b98a8)
This commit is contained in:
Gert Wollny 2024-04-20 21:58:44 +02:00 committed by Eric Engestrom
parent 93ce419991
commit fe5147ae49
4 changed files with 18 additions and 9 deletions

View file

@ -14,7 +14,7 @@
"description": "r600/sfn: when emitting fp64 op2 groups pre-load values",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "79ca456b4837b3bc21cf9ef3c03c505c4b4909f6",
"notes": null

View file

@ -2101,6 +2101,14 @@ emit_alu_op2_64bit(const nir_alu_instr& alu,
int num_emit0 = opcode == op2_mul_64 ? 3 : 1;
std::array<std::array<PRegister, 4>,2> tmp;
for (unsigned k = 0; k < alu.def.num_components; ++k) {
tmp[k][0] = shader.emit_load_to_register(value_factory.src64(alu.src[order[0]], k, 1), 0);
tmp[k][1] = shader.emit_load_to_register(value_factory.src64(alu.src[order[1]], k, 1), 1);
tmp[k][2] = shader.emit_load_to_register(value_factory.src64(alu.src[order[0]], k, 0), 2);
tmp[k][3] = shader.emit_load_to_register(value_factory.src64(alu.src[order[1]], k, 0), 3);
}
assert(num_emit0 == 1 || alu.def.num_components == 1);
for (unsigned k = 0; k < alu.def.num_components; ++k) {
@ -2111,8 +2119,8 @@ emit_alu_op2_64bit(const nir_alu_instr& alu,
ir = new AluInstr(opcode,
dest,
value_factory.src64(alu.src[order[0]], k, 1),
value_factory.src64(alu.src[order[1]], k, 1),
tmp[k][0],
tmp[k][1],
i < 2 ? AluInstr::write : AluInstr::empty);
group->add_instruction(ir);
}
@ -2122,8 +2130,8 @@ emit_alu_op2_64bit(const nir_alu_instr& alu,
ir = new AluInstr(opcode,
dest,
value_factory.src64(alu.src[order[0]], k, 0),
value_factory.src64(alu.src[order[1]], k, 0),
tmp[k][2],
tmp[k][3],
i == 1 ? AluInstr::write : AluInstr::empty);
group->add_instruction(ir);
}

View file

@ -938,13 +938,14 @@ lds_op_from_intrinsic(nir_atomic_op op, bool ret)
}
PRegister
Shader::emit_load_to_register(PVirtualValue src)
Shader::emit_load_to_register(PVirtualValue src, int chan)
{
assert(src);
PRegister dest = src->as_register();
if (!dest) {
dest = value_factory().temp_register();
if (!dest || chan >= 0) {
dest = value_factory().temp_register(chan);
dest->set_pin(pin_free);
emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::last_write));
}
return dest;

View file

@ -261,7 +261,7 @@ public:
return m_rat_return_address;
}
PRegister emit_load_to_register(PVirtualValue src);
PRegister emit_load_to_register(PVirtualValue src, int chan = -1);
virtual unsigned image_size_const_offset() { return 0;}