mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-28 14:40:10 +01:00
vc4: Pair up QPU instructions when scheduling.
We've got two mostly-independent operations in each QPU instruction, so try to pack two operations together. This is fairly naive (doesn't track read and write separately in instructions, doesn't convert ADD-based MOVs into MUL-based movs, doesn't reorder across uniform loads), but does show a decent improvement on shader-db-2. total instructions in shared programs: 59583 -> 57651 (-3.24%) instructions in affected programs: 47361 -> 45429 (-4.08%)
This commit is contained in:
parent
7b0067d23a
commit
29c7cf2b2b
3 changed files with 105 additions and 38 deletions
|
|
@ -192,36 +192,58 @@ qpu_m_alu2(enum qpu_op_mul op,
|
|||
return inst;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
merge_fields(uint64_t merge,
|
||||
uint64_t add, uint64_t mul,
|
||||
static bool
|
||||
merge_fields(uint64_t *merge,
|
||||
uint64_t a, uint64_t b,
|
||||
uint64_t mask, uint64_t ignore)
|
||||
{
|
||||
if ((add & mask) == ignore)
|
||||
return (merge & ~mask) | (mul & mask);
|
||||
else if ((mul & mask) == ignore)
|
||||
return (merge & ~mask) | (add & mask);
|
||||
else {
|
||||
assert((add & mask) == (mul & mask));
|
||||
return merge;
|
||||
if ((a & mask) == ignore) {
|
||||
*merge = (*merge & ~mask) | (b & mask);
|
||||
} else if ((b & mask) == ignore) {
|
||||
*merge = (*merge & ~mask) | (a & mask);
|
||||
} else {
|
||||
if ((a & mask) != (b & mask))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
qpu_inst(uint64_t add, uint64_t mul)
|
||||
qpu_merge_inst(uint64_t a, uint64_t b)
|
||||
{
|
||||
uint64_t merge = ((add & ~QPU_WADDR_MUL_MASK) |
|
||||
(mul & ~QPU_WADDR_ADD_MASK));
|
||||
uint64_t merge = a | b;
|
||||
bool ok = true;
|
||||
|
||||
merge = merge_fields(merge, add, mul, QPU_SIG_MASK,
|
||||
QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
|
||||
if (QPU_GET_FIELD(a, QPU_OP_ADD) != QPU_A_NOP &&
|
||||
QPU_GET_FIELD(b, QPU_OP_ADD) != QPU_A_NOP)
|
||||
return 0;
|
||||
|
||||
merge = merge_fields(merge, add, mul, QPU_RADDR_A_MASK,
|
||||
QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A));
|
||||
merge = merge_fields(merge, add, mul, QPU_RADDR_B_MASK,
|
||||
QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B));
|
||||
if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP &&
|
||||
QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
|
||||
return 0;
|
||||
|
||||
return merge;
|
||||
ok = ok && merge_fields(&merge, a, b, QPU_SIG_MASK,
|
||||
QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
|
||||
|
||||
/* Misc fields that have to match exactly. */
|
||||
ok = ok && merge_fields(&merge, a, b, QPU_SF | QPU_WS | QPU_PM,
|
||||
~0);
|
||||
|
||||
ok = ok && merge_fields(&merge, a, b, QPU_RADDR_A_MASK,
|
||||
QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A));
|
||||
ok = ok && merge_fields(&merge, a, b, QPU_RADDR_B_MASK,
|
||||
QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B));
|
||||
|
||||
ok = ok && merge_fields(&merge, a, b, QPU_WADDR_ADD_MASK,
|
||||
QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD));
|
||||
ok = ok && merge_fields(&merge, a, b, QPU_WADDR_MUL_MASK,
|
||||
QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL));
|
||||
|
||||
if (ok)
|
||||
return merge;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
|
|
|
|||
|
|
@ -129,7 +129,7 @@ uint64_t qpu_a_alu2(enum qpu_op_add op, struct qpu_reg dst,
|
|||
struct qpu_reg src0, struct qpu_reg src1);
|
||||
uint64_t qpu_m_alu2(enum qpu_op_mul op, struct qpu_reg dst,
|
||||
struct qpu_reg src0, struct qpu_reg src1);
|
||||
uint64_t qpu_inst(uint64_t add, uint64_t mul);
|
||||
uint64_t qpu_merge_inst(uint64_t a, uint64_t b);
|
||||
uint64_t qpu_load_imm_ui(struct qpu_reg dst, uint32_t val);
|
||||
uint64_t qpu_set_sig(uint64_t inst, uint32_t sig);
|
||||
uint64_t qpu_set_cond_add(uint64_t inst, uint32_t cond);
|
||||
|
|
|
|||
|
|
@ -465,7 +465,8 @@ get_instruction_priority(uint64_t inst)
|
|||
|
||||
static struct schedule_node *
|
||||
choose_instruction_to_schedule(struct choose_scoreboard *scoreboard,
|
||||
struct simple_node *schedule_list)
|
||||
struct simple_node *schedule_list,
|
||||
uint64_t prev_inst)
|
||||
{
|
||||
struct schedule_node *chosen = NULL;
|
||||
struct simple_node *node;
|
||||
|
|
@ -490,6 +491,15 @@ choose_instruction_to_schedule(struct choose_scoreboard *scoreboard,
|
|||
if (pixel_scoreboard_too_soon(scoreboard, inst))
|
||||
continue;
|
||||
|
||||
/* If we're trying to pair with another instruction, check
|
||||
* that they're compatible.
|
||||
*/
|
||||
if (prev_inst != 0) {
|
||||
inst = qpu_merge_inst(prev_inst, inst);
|
||||
if (!inst)
|
||||
continue;
|
||||
}
|
||||
|
||||
int prio = get_instruction_priority(inst);
|
||||
|
||||
/* Found a valid instruction. If nothing better comes along,
|
||||
|
|
@ -570,6 +580,23 @@ compute_delay(struct schedule_node *n)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
mark_instruction_scheduled(struct simple_node *schedule_list,
|
||||
struct schedule_node *node)
|
||||
{
|
||||
if (!node)
|
||||
return;
|
||||
|
||||
for (int i = node->child_count - 1; i >= 0; i--) {
|
||||
struct schedule_node *child =
|
||||
node->children[i];
|
||||
|
||||
child->parent_count--;
|
||||
if (child->parent_count == 0)
|
||||
insert_at_head(schedule_list, &child->link);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
schedule_instructions(struct vc4_compile *c, struct simple_node *schedule_list)
|
||||
{
|
||||
|
|
@ -598,7 +625,9 @@ schedule_instructions(struct vc4_compile *c, struct simple_node *schedule_list)
|
|||
while (!is_empty_list(schedule_list)) {
|
||||
struct schedule_node *chosen =
|
||||
choose_instruction_to_schedule(&scoreboard,
|
||||
schedule_list);
|
||||
schedule_list,
|
||||
0);
|
||||
struct schedule_node *merge = NULL;
|
||||
|
||||
/* If there are no valid instructions to schedule, drop a NOP
|
||||
* in.
|
||||
|
|
@ -610,12 +639,38 @@ schedule_instructions(struct vc4_compile *c, struct simple_node *schedule_list)
|
|||
dump_state(schedule_list);
|
||||
fprintf(stderr, "chose: ");
|
||||
vc4_qpu_disasm(&inst, 1);
|
||||
fprintf(stderr, "\n\n");
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
/* Schedule this instruction onto the QPU list. */
|
||||
if (chosen)
|
||||
/* Schedule this instruction onto the QPU list. Also try to
|
||||
* find an instruction to pair with it.
|
||||
*/
|
||||
if (chosen) {
|
||||
remove_from_list(&chosen->link);
|
||||
|
||||
merge = choose_instruction_to_schedule(&scoreboard,
|
||||
schedule_list,
|
||||
inst);
|
||||
if (merge) {
|
||||
remove_from_list(&merge->link);
|
||||
inst = qpu_merge_inst(inst, merge->inst->inst);
|
||||
assert(inst != 0);
|
||||
|
||||
if (debug) {
|
||||
fprintf(stderr, "merging: ");
|
||||
vc4_qpu_disasm(&merge->inst->inst, 1);
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "resulting in: ");
|
||||
vc4_qpu_disasm(&inst, 1);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (debug) {
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
qpu_serialize_one_inst(c, inst);
|
||||
|
||||
update_scoreboard_for_chosen(&scoreboard, inst);
|
||||
|
|
@ -625,18 +680,8 @@ schedule_instructions(struct vc4_compile *c, struct simple_node *schedule_list)
|
|||
* be scheduled. Update the children's unblocked time for this
|
||||
* DAG edge as we do so.
|
||||
*/
|
||||
if (chosen) {
|
||||
for (int i = chosen->child_count - 1; i >= 0; i--) {
|
||||
struct schedule_node *child =
|
||||
chosen->children[i];
|
||||
|
||||
child->parent_count--;
|
||||
if (child->parent_count == 0) {
|
||||
insert_at_head(schedule_list,
|
||||
&child->link);
|
||||
}
|
||||
}
|
||||
}
|
||||
mark_instruction_scheduled(schedule_list, chosen);
|
||||
mark_instruction_scheduled(schedule_list, merge);
|
||||
|
||||
scoreboard.tick++;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue