broadcom/compiler: enable ldvary pipelining on v71

Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25450>
This commit is contained in:
Iago Toral Quiroga 2021-10-27 11:35:12 +02:00 committed by Marge Bot
parent a8014be2b0
commit d8a25bdb07

View file

@ -2312,46 +2312,72 @@ emit_branch(struct v3d_compile *c,
} }
static bool static bool
alu_reads_register(struct v3d_qpu_instr *inst, alu_reads_register(const struct v3d_device_info *devinfo,
struct v3d_qpu_instr *inst,
bool add, bool magic, uint32_t index) bool add, bool magic, uint32_t index)
{ {
uint32_t num_src; uint32_t num_src;
enum v3d_qpu_mux mux_a, mux_b; if (add)
if (add) {
num_src = v3d_qpu_add_op_num_src(inst->alu.add.op); num_src = v3d_qpu_add_op_num_src(inst->alu.add.op);
mux_a = inst->alu.add.a.mux; else
mux_b = inst->alu.add.b.mux;
} else {
num_src = v3d_qpu_mul_op_num_src(inst->alu.mul.op); num_src = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
mux_a = inst->alu.mul.a.mux;
mux_b = inst->alu.mul.b.mux; if (devinfo->ver <= 42) {
enum v3d_qpu_mux mux_a, mux_b;
if (add) {
mux_a = inst->alu.add.a.mux;
mux_b = inst->alu.add.b.mux;
} else {
mux_a = inst->alu.mul.a.mux;
mux_b = inst->alu.mul.b.mux;
}
for (int i = 0; i < num_src; i++) {
if (magic) {
if (i == 0 && mux_a == index)
return true;
if (i == 1 && mux_b == index)
return true;
} else {
if (i == 0 && mux_a == V3D_QPU_MUX_A &&
inst->raddr_a == index) {
return true;
}
if (i == 0 && mux_a == V3D_QPU_MUX_B &&
inst->raddr_b == index) {
return true;
}
if (i == 1 && mux_b == V3D_QPU_MUX_A &&
inst->raddr_a == index) {
return true;
}
if (i == 1 && mux_b == V3D_QPU_MUX_B &&
inst->raddr_b == index) {
return true;
}
}
}
return false;
}
assert(devinfo->ver >= 71);
assert(!magic);
uint32_t raddr_a, raddr_b;
if (add) {
raddr_a = inst->alu.add.a.raddr;
raddr_b = inst->alu.add.b.raddr;
} else {
raddr_a = inst->alu.mul.a.raddr;
raddr_b = inst->alu.mul.b.raddr;
} }
for (int i = 0; i < num_src; i++) { for (int i = 0; i < num_src; i++) {
if (magic) { if (i == 0 && raddr_a == index)
if (i == 0 && mux_a == index) return true;
return true; if (i == 1 && raddr_b == index)
if (i == 1 && mux_b == index) return true;
return true;
} else {
if (i == 0 && mux_a == V3D_QPU_MUX_A &&
inst->raddr_a == index) {
return true;
}
if (i == 0 && mux_a == V3D_QPU_MUX_B &&
inst->raddr_b == index) {
return true;
}
if (i == 1 && mux_b == V3D_QPU_MUX_A &&
inst->raddr_a == index) {
return true;
}
if (i == 1 && mux_b == V3D_QPU_MUX_B &&
inst->raddr_b == index) {
return true;
}
}
} }
return false; return false;
@ -2386,6 +2412,8 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
struct qblock *block, struct qblock *block,
struct v3d_qpu_instr *inst) struct v3d_qpu_instr *inst)
{ {
const struct v3d_device_info *devinfo = c->devinfo;
/* We only call this if we have successfully merged an ldvary into a /* We only call this if we have successfully merged an ldvary into a
* previous instruction. * previous instruction.
*/ */
@ -2398,9 +2426,9 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
* the ldvary destination, if it does, then moving the ldvary before * the ldvary destination, if it does, then moving the ldvary before
* it would overwrite it. * it would overwrite it.
*/ */
if (alu_reads_register(inst, true, ldvary_magic, ldvary_index)) if (alu_reads_register(devinfo, inst, true, ldvary_magic, ldvary_index))
return false; return false;
if (alu_reads_register(inst, false, ldvary_magic, ldvary_index)) if (alu_reads_register(devinfo, inst, false, ldvary_magic, ldvary_index))
return false; return false;
/* The implicit ldvary destination may not be written to by a signal /* The implicit ldvary destination may not be written to by a signal
@ -2436,13 +2464,13 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
} }
/* The previous instruction cannot have a conflicting signal */ /* The previous instruction cannot have a conflicting signal */
if (v3d_qpu_sig_writes_address(c->devinfo, &prev->qpu.sig)) if (v3d_qpu_sig_writes_address(devinfo, &prev->qpu.sig))
return false; return false;
uint32_t sig; uint32_t sig;
struct v3d_qpu_sig new_sig = prev->qpu.sig; struct v3d_qpu_sig new_sig = prev->qpu.sig;
new_sig.ldvary = true; new_sig.ldvary = true;
if (!v3d_qpu_sig_pack(c->devinfo, &new_sig, &sig)) if (!v3d_qpu_sig_pack(devinfo, &new_sig, &sig))
return false; return false;
/* The previous instruction cannot use flags since ldvary uses the /* The previous instruction cannot use flags since ldvary uses the
@ -2471,14 +2499,25 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
inst->sig_magic = false; inst->sig_magic = false;
inst->sig_addr = 0; inst->sig_addr = 0;
/* By moving ldvary to the previous instruction we make it update /* Update rf0 flops tracking for new ldvary delayed rf0 write tick */
* r5 in the current one, so nothing else in it should write r5. if (devinfo->ver >= 71) {
* This should've been prevented by our dependency tracking, which scoreboard->last_implicit_rf0_write_tick = scoreboard->tick;
set_has_rf0_flops_conflict(scoreboard, inst, devinfo);
}
/* By moving ldvary to the previous instruction we make it update r5
* (rf0 for ver >= 71) in the current one, so nothing else in it
* should write this register.
*
* This should've been prevented by our depedency tracking, which
* would not allow ldvary to be paired up with an instruction that * would not allow ldvary to be paired up with an instruction that
* writes r5 (since our dependency tracking doesn't know that the * writes r5/rf0 (since our dependency tracking doesn't know that the
* ldvary write r5 happens in the next instruction). * ldvary write to r5/rf0 happens in the next instruction).
*/ */
assert(!v3d_qpu_writes_r5(c->devinfo, inst)); assert(!v3d_qpu_writes_r5(devinfo, inst));
assert(devinfo->ver <= 42 ||
(!v3d_qpu_writes_rf0_implicitly(devinfo, inst) &&
!v3d71_qpu_writes_waddr_explicitly(devinfo, inst, 0)));
return true; return true;
} }