mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-31 01:10:16 +01:00
broadcom/compiler: enable ldvary pipelining on v71
Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25450>
This commit is contained in:
parent
a8014be2b0
commit
d8a25bdb07
1 changed files with 81 additions and 42 deletions
|
|
@ -2312,46 +2312,72 @@ emit_branch(struct v3d_compile *c,
|
|||
}
|
||||
|
||||
static bool
|
||||
alu_reads_register(struct v3d_qpu_instr *inst,
|
||||
alu_reads_register(const struct v3d_device_info *devinfo,
|
||||
struct v3d_qpu_instr *inst,
|
||||
bool add, bool magic, uint32_t index)
|
||||
{
|
||||
uint32_t num_src;
|
||||
enum v3d_qpu_mux mux_a, mux_b;
|
||||
|
||||
if (add) {
|
||||
if (add)
|
||||
num_src = v3d_qpu_add_op_num_src(inst->alu.add.op);
|
||||
mux_a = inst->alu.add.a.mux;
|
||||
mux_b = inst->alu.add.b.mux;
|
||||
} else {
|
||||
else
|
||||
num_src = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
|
||||
mux_a = inst->alu.mul.a.mux;
|
||||
mux_b = inst->alu.mul.b.mux;
|
||||
|
||||
if (devinfo->ver <= 42) {
|
||||
enum v3d_qpu_mux mux_a, mux_b;
|
||||
if (add) {
|
||||
mux_a = inst->alu.add.a.mux;
|
||||
mux_b = inst->alu.add.b.mux;
|
||||
} else {
|
||||
mux_a = inst->alu.mul.a.mux;
|
||||
mux_b = inst->alu.mul.b.mux;
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_src; i++) {
|
||||
if (magic) {
|
||||
if (i == 0 && mux_a == index)
|
||||
return true;
|
||||
if (i == 1 && mux_b == index)
|
||||
return true;
|
||||
} else {
|
||||
if (i == 0 && mux_a == V3D_QPU_MUX_A &&
|
||||
inst->raddr_a == index) {
|
||||
return true;
|
||||
}
|
||||
if (i == 0 && mux_a == V3D_QPU_MUX_B &&
|
||||
inst->raddr_b == index) {
|
||||
return true;
|
||||
}
|
||||
if (i == 1 && mux_b == V3D_QPU_MUX_A &&
|
||||
inst->raddr_a == index) {
|
||||
return true;
|
||||
}
|
||||
if (i == 1 && mux_b == V3D_QPU_MUX_B &&
|
||||
inst->raddr_b == index) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
assert(devinfo->ver >= 71);
|
||||
assert(!magic);
|
||||
|
||||
uint32_t raddr_a, raddr_b;
|
||||
if (add) {
|
||||
raddr_a = inst->alu.add.a.raddr;
|
||||
raddr_b = inst->alu.add.b.raddr;
|
||||
} else {
|
||||
raddr_a = inst->alu.mul.a.raddr;
|
||||
raddr_b = inst->alu.mul.b.raddr;
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_src; i++) {
|
||||
if (magic) {
|
||||
if (i == 0 && mux_a == index)
|
||||
return true;
|
||||
if (i == 1 && mux_b == index)
|
||||
return true;
|
||||
} else {
|
||||
if (i == 0 && mux_a == V3D_QPU_MUX_A &&
|
||||
inst->raddr_a == index) {
|
||||
return true;
|
||||
}
|
||||
if (i == 0 && mux_a == V3D_QPU_MUX_B &&
|
||||
inst->raddr_b == index) {
|
||||
return true;
|
||||
}
|
||||
if (i == 1 && mux_b == V3D_QPU_MUX_A &&
|
||||
inst->raddr_a == index) {
|
||||
return true;
|
||||
}
|
||||
if (i == 1 && mux_b == V3D_QPU_MUX_B &&
|
||||
inst->raddr_b == index) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (i == 0 && raddr_a == index)
|
||||
return true;
|
||||
if (i == 1 && raddr_b == index)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
|
|
@ -2386,6 +2412,8 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
|
|||
struct qblock *block,
|
||||
struct v3d_qpu_instr *inst)
|
||||
{
|
||||
const struct v3d_device_info *devinfo = c->devinfo;
|
||||
|
||||
/* We only call this if we have successfully merged an ldvary into a
|
||||
* previous instruction.
|
||||
*/
|
||||
|
|
@ -2398,9 +2426,9 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
|
|||
* the ldvary destination, if it does, then moving the ldvary before
|
||||
* it would overwrite it.
|
||||
*/
|
||||
if (alu_reads_register(inst, true, ldvary_magic, ldvary_index))
|
||||
if (alu_reads_register(devinfo, inst, true, ldvary_magic, ldvary_index))
|
||||
return false;
|
||||
if (alu_reads_register(inst, false, ldvary_magic, ldvary_index))
|
||||
if (alu_reads_register(devinfo, inst, false, ldvary_magic, ldvary_index))
|
||||
return false;
|
||||
|
||||
/* The implicit ldvary destination may not be written to by a signal
|
||||
|
|
@ -2436,13 +2464,13 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
|
|||
}
|
||||
|
||||
/* The previous instruction cannot have a conflicting signal */
|
||||
if (v3d_qpu_sig_writes_address(c->devinfo, &prev->qpu.sig))
|
||||
if (v3d_qpu_sig_writes_address(devinfo, &prev->qpu.sig))
|
||||
return false;
|
||||
|
||||
uint32_t sig;
|
||||
struct v3d_qpu_sig new_sig = prev->qpu.sig;
|
||||
new_sig.ldvary = true;
|
||||
if (!v3d_qpu_sig_pack(c->devinfo, &new_sig, &sig))
|
||||
if (!v3d_qpu_sig_pack(devinfo, &new_sig, &sig))
|
||||
return false;
|
||||
|
||||
/* The previous instruction cannot use flags since ldvary uses the
|
||||
|
|
@ -2471,14 +2499,25 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
|
|||
inst->sig_magic = false;
|
||||
inst->sig_addr = 0;
|
||||
|
||||
/* By moving ldvary to the previous instruction we make it update
|
||||
* r5 in the current one, so nothing else in it should write r5.
|
||||
* This should've been prevented by our dependency tracking, which
|
||||
/* Update rf0 flops tracking for new ldvary delayed rf0 write tick */
|
||||
if (devinfo->ver >= 71) {
|
||||
scoreboard->last_implicit_rf0_write_tick = scoreboard->tick;
|
||||
set_has_rf0_flops_conflict(scoreboard, inst, devinfo);
|
||||
}
|
||||
|
||||
/* By moving ldvary to the previous instruction we make it update r5
|
||||
* (rf0 for ver >= 71) in the current one, so nothing else in it
|
||||
* should write this register.
|
||||
*
|
||||
* This should've been prevented by our depedency tracking, which
|
||||
* would not allow ldvary to be paired up with an instruction that
|
||||
* writes r5 (since our dependency tracking doesn't know that the
|
||||
* ldvary write r5 happens in the next instruction).
|
||||
* writes r5/rf0 (since our dependency tracking doesn't know that the
|
||||
* ldvary write to r5/rf0 happens in the next instruction).
|
||||
*/
|
||||
assert(!v3d_qpu_writes_r5(c->devinfo, inst));
|
||||
assert(!v3d_qpu_writes_r5(devinfo, inst));
|
||||
assert(devinfo->ver <= 42 ||
|
||||
(!v3d_qpu_writes_rf0_implicitly(devinfo, inst) &&
|
||||
!v3d71_qpu_writes_waddr_explicitly(devinfo, inst, 0)));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue