From 22a979be6516da89d1d1b3c0a8923c5d236b1abd Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Thu, 18 Mar 2021 13:03:01 +0100 Subject: [PATCH] broadcom/compiler: convert add to mul when possible to allow merge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Integer add/sub can be implemented as either an add or a mul instruction but we always emit them as add instructions at VIR level. We can use this flexibility to improve our QPU scheduling so we can be more effective at instruction merging by converting these to mul instructions when we are attempting to merge them with another add instruction. total instructions in shared programs: 13721549 -> 13691004 (-0.22%) instructions in affected programs: 3340493 -> 3309948 (-0.91%) helped: 12805 HURT: 1656 Instructions are helped. total max-temps in shared programs: 2319528 -> 2319317 (<.01%) max-temps in affected programs: 5285 -> 5074 (-3.99%) helped: 195 HURT: 3 Max-temps are helped. total sfu-stalls in shared programs: 31616 -> 31752 (0.43%) sfu-stalls in affected programs: 469 -> 605 (29.00%) helped: 52 HURT: 161 Sfu-stalls are HURT. total inst-and-stalls in shared programs: 13753165 -> 13722756 (-0.22%) inst-and-stalls in affected programs: 3340383 -> 3309974 (-0.91%) helped: 12782 HURT: 1666 Inst-and-stalls are helped. Reviewed-by: Alejandro PiƱeiro Part-of: --- src/broadcom/compiler/qpu_schedule.c | 97 +++++++++++++++++++++++++--- 1 file changed, 88 insertions(+), 9 deletions(-) diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c index 317b7306d88..cd0015a62d3 100644 --- a/src/broadcom/compiler/qpu_schedule.c +++ b/src/broadcom/compiler/qpu_schedule.c @@ -820,6 +820,50 @@ qpu_merge_raddrs(struct v3d_qpu_instr *result, return true; } +static bool +can_do_add_as_mul(enum v3d_qpu_add_op op) +{ + switch (op) { + case V3D_QPU_A_ADD: + case V3D_QPU_A_SUB: + return true; + default: + return false; + } +} + +static enum v3d_qpu_mul_op +add_op_as_mul_op(enum v3d_qpu_add_op op) +{ + switch (op) { + case V3D_QPU_A_ADD: + return V3D_QPU_M_ADD; + case V3D_QPU_A_SUB: + return V3D_QPU_M_SUB; + default: + unreachable("unexpected add opcode"); + } +} + +static void +qpu_convert_add_to_mul(struct v3d_qpu_instr *inst) +{ + STATIC_ASSERT(sizeof(inst->alu.mul) == sizeof(inst->alu.add)); + assert(inst->alu.add.op != V3D_QPU_A_NOP); + assert(inst->alu.mul.op == V3D_QPU_M_NOP); + + memcpy(&inst->alu.mul, &inst->alu.add, sizeof(inst->alu.mul)); + inst->alu.mul.op = add_op_as_mul_op(inst->alu.add.op); + inst->alu.add.op = V3D_QPU_A_NOP; + + inst->flags.mc = inst->flags.ac; + inst->flags.mpf = inst->flags.apf; + inst->flags.muf = inst->flags.auf; + inst->flags.ac = V3D_QPU_PF_NONE; + inst->flags.apf = V3D_QPU_PF_NONE; + inst->flags.auf = V3D_QPU_PF_NONE; +} + static bool qpu_merge_inst(const struct v3d_device_info *devinfo, struct v3d_qpu_instr *result, @@ -837,17 +881,52 @@ qpu_merge_inst(const struct v3d_device_info *devinfo, struct v3d_qpu_instr merge = *a; const struct v3d_qpu_instr *add_instr = NULL, *mul_instr = NULL; + struct v3d_qpu_instr mul_inst; if (b->alu.add.op != V3D_QPU_A_NOP) { - if (a->alu.add.op != V3D_QPU_A_NOP) + if (a->alu.add.op == V3D_QPU_A_NOP) { + merge.alu.add = b->alu.add; + + merge.flags.ac = b->flags.ac; + merge.flags.apf = b->flags.apf; + merge.flags.auf = b->flags.auf; + + add_instr = b; + mul_instr = a; + } + /* If a's add op is used but its mul op is not, then see if we + * can convert either a's add op or b's add op to a mul op + * so we can merge. + */ + else if (a->alu.mul.op == V3D_QPU_M_NOP && + can_do_add_as_mul(b->alu.add.op)) { + mul_inst = *b; + qpu_convert_add_to_mul(&mul_inst); + + merge.alu.mul = mul_inst.alu.mul; + + merge.flags.mc = b->flags.ac; + merge.flags.mpf = b->flags.apf; + merge.flags.muf = b->flags.auf; + + add_instr = a; + mul_instr = &mul_inst; + } else if (a->alu.mul.op == V3D_QPU_M_NOP && + can_do_add_as_mul(a->alu.add.op)) { + mul_inst = *a; + qpu_convert_add_to_mul(&mul_inst); + + merge = mul_inst; + merge.alu.add = b->alu.add; + + merge.flags.ac = b->flags.ac; + merge.flags.apf = b->flags.apf; + merge.flags.auf = b->flags.auf; + + add_instr = b; + mul_instr = &mul_inst; + } else { return false; - merge.alu.add = b->alu.add; - - merge.flags.ac = b->flags.ac; - merge.flags.apf = b->flags.apf; - merge.flags.auf = b->flags.auf; - - add_instr = b; - mul_instr = a; + } } if (b->alu.mul.op != V3D_QPU_M_NOP) {