broadcom/compiler: enable ldvary pipelining on v71

Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25450>
This commit is contained in:
Iago Toral Quiroga 2021-10-27 11:35:12 +02:00 committed by Marge Bot
parent a8014be2b0
commit d8a25bdb07

View file

@ -2312,46 +2312,72 @@ emit_branch(struct v3d_compile *c,
}
static bool
alu_reads_register(struct v3d_qpu_instr *inst,
alu_reads_register(const struct v3d_device_info *devinfo,
struct v3d_qpu_instr *inst,
bool add, bool magic, uint32_t index)
{
uint32_t num_src;
enum v3d_qpu_mux mux_a, mux_b;
if (add) {
if (add)
num_src = v3d_qpu_add_op_num_src(inst->alu.add.op);
mux_a = inst->alu.add.a.mux;
mux_b = inst->alu.add.b.mux;
} else {
else
num_src = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
mux_a = inst->alu.mul.a.mux;
mux_b = inst->alu.mul.b.mux;
if (devinfo->ver <= 42) {
enum v3d_qpu_mux mux_a, mux_b;
if (add) {
mux_a = inst->alu.add.a.mux;
mux_b = inst->alu.add.b.mux;
} else {
mux_a = inst->alu.mul.a.mux;
mux_b = inst->alu.mul.b.mux;
}
for (int i = 0; i < num_src; i++) {
if (magic) {
if (i == 0 && mux_a == index)
return true;
if (i == 1 && mux_b == index)
return true;
} else {
if (i == 0 && mux_a == V3D_QPU_MUX_A &&
inst->raddr_a == index) {
return true;
}
if (i == 0 && mux_a == V3D_QPU_MUX_B &&
inst->raddr_b == index) {
return true;
}
if (i == 1 && mux_b == V3D_QPU_MUX_A &&
inst->raddr_a == index) {
return true;
}
if (i == 1 && mux_b == V3D_QPU_MUX_B &&
inst->raddr_b == index) {
return true;
}
}
}
return false;
}
assert(devinfo->ver >= 71);
assert(!magic);
uint32_t raddr_a, raddr_b;
if (add) {
raddr_a = inst->alu.add.a.raddr;
raddr_b = inst->alu.add.b.raddr;
} else {
raddr_a = inst->alu.mul.a.raddr;
raddr_b = inst->alu.mul.b.raddr;
}
for (int i = 0; i < num_src; i++) {
if (magic) {
if (i == 0 && mux_a == index)
return true;
if (i == 1 && mux_b == index)
return true;
} else {
if (i == 0 && mux_a == V3D_QPU_MUX_A &&
inst->raddr_a == index) {
return true;
}
if (i == 0 && mux_a == V3D_QPU_MUX_B &&
inst->raddr_b == index) {
return true;
}
if (i == 1 && mux_b == V3D_QPU_MUX_A &&
inst->raddr_a == index) {
return true;
}
if (i == 1 && mux_b == V3D_QPU_MUX_B &&
inst->raddr_b == index) {
return true;
}
}
if (i == 0 && raddr_a == index)
return true;
if (i == 1 && raddr_b == index)
return true;
}
return false;
@ -2386,6 +2412,8 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
struct qblock *block,
struct v3d_qpu_instr *inst)
{
const struct v3d_device_info *devinfo = c->devinfo;
/* We only call this if we have successfully merged an ldvary into a
* previous instruction.
*/
@ -2398,9 +2426,9 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
* the ldvary destination, if it does, then moving the ldvary before
* it would overwrite it.
*/
if (alu_reads_register(inst, true, ldvary_magic, ldvary_index))
if (alu_reads_register(devinfo, inst, true, ldvary_magic, ldvary_index))
return false;
if (alu_reads_register(inst, false, ldvary_magic, ldvary_index))
if (alu_reads_register(devinfo, inst, false, ldvary_magic, ldvary_index))
return false;
/* The implicit ldvary destination may not be written to by a signal
@ -2436,13 +2464,13 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
}
/* The previous instruction cannot have a conflicting signal */
if (v3d_qpu_sig_writes_address(c->devinfo, &prev->qpu.sig))
if (v3d_qpu_sig_writes_address(devinfo, &prev->qpu.sig))
return false;
uint32_t sig;
struct v3d_qpu_sig new_sig = prev->qpu.sig;
new_sig.ldvary = true;
if (!v3d_qpu_sig_pack(c->devinfo, &new_sig, &sig))
if (!v3d_qpu_sig_pack(devinfo, &new_sig, &sig))
return false;
/* The previous instruction cannot use flags since ldvary uses the
@ -2471,14 +2499,25 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
inst->sig_magic = false;
inst->sig_addr = 0;
/* By moving ldvary to the previous instruction we make it update
* r5 in the current one, so nothing else in it should write r5.
* This should've been prevented by our dependency tracking, which
/* Update rf0 flops tracking for new ldvary delayed rf0 write tick */
if (devinfo->ver >= 71) {
scoreboard->last_implicit_rf0_write_tick = scoreboard->tick;
set_has_rf0_flops_conflict(scoreboard, inst, devinfo);
}
/* By moving ldvary to the previous instruction we make it update r5
* (rf0 for ver >= 71) in the current one, so nothing else in it
* should write this register.
*
* This should've been prevented by our depedency tracking, which
* would not allow ldvary to be paired up with an instruction that
* writes r5 (since our dependency tracking doesn't know that the
* ldvary write r5 happens in the next instruction).
* writes r5/rf0 (since our dependency tracking doesn't know that the
* ldvary write to r5/rf0 happens in the next instruction).
*/
assert(!v3d_qpu_writes_r5(c->devinfo, inst));
assert(!v3d_qpu_writes_r5(devinfo, inst));
assert(devinfo->ver <= 42 ||
(!v3d_qpu_writes_rf0_implicitly(devinfo, inst) &&
!v3d71_qpu_writes_waddr_explicitly(devinfo, inst, 0)));
return true;
}