mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 04:38:03 +02:00
broadcom/compiler: be more flexible scheduling TMU writes
V3D 4.x allows more flexibility, so take advantage of that. Generally, we can reorder any writes in the same sequence, so long as they are not the sequence terminator (which must always be last, since it is the one triggering the operation), and TMUD writes, since these must be ordered with respect to each other. total instructions in shared programs: 13735183 -> 13731927 (-0.02%) instructions in affected programs: 903057 -> 899801 (-0.36%) helped: 2358 HURT: 746 Instructions are helped. total max-temps in shared programs: 2322020 -> 2322009 (<.01%) max-temps in affected programs: 619 -> 608 (-1.78%) helped: 19 HURT: 11 Inconclusive result (value mean confidence interval includes 0). total sfu-stalls in shared programs: 31494 -> 31489 (-0.02%) sfu-stalls in affected programs: 182 -> 177 (-2.75%) helped: 40 HURT: 40 Inconclusive result (value mean confidence interval includes 0). total inst-and-stalls in shared programs: 13766677 -> 13763416 (-0.02%) inst-and-stalls in affected programs: 901343 -> 898082 (-0.36%) helped: 2349 HURT: 746 Inst-and-stalls are helped. Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9555>
This commit is contained in:
parent
87ed614c47
commit
177dcd4b68
1 changed files with 36 additions and 15 deletions
|
|
@ -168,6 +168,36 @@ process_mux_deps(struct schedule_state *state, struct schedule_node *n,
|
|||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
tmu_write_is_sequence_terminator(uint32_t waddr)
|
||||
{
|
||||
switch (waddr) {
|
||||
case V3D_QPU_WADDR_TMUS:
|
||||
case V3D_QPU_WADDR_TMUSCM:
|
||||
case V3D_QPU_WADDR_TMUSF:
|
||||
case V3D_QPU_WADDR_TMUSLOD:
|
||||
case V3D_QPU_WADDR_TMUA:
|
||||
case V3D_QPU_WADDR_TMUAU:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
can_reorder_tmu_write(const struct v3d_device_info *devinfo, uint32_t waddr)
|
||||
{
|
||||
if (devinfo->ver < 40)
|
||||
return false;
|
||||
|
||||
if (tmu_write_is_sequence_terminator(waddr))
|
||||
return false;
|
||||
|
||||
if (waddr == V3D_QPU_WADDR_TMUD)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
process_waddr_deps(struct schedule_state *state, struct schedule_node *n,
|
||||
|
|
@ -176,22 +206,13 @@ process_waddr_deps(struct schedule_state *state, struct schedule_node *n,
|
|||
if (!magic) {
|
||||
add_write_dep(state, &state->last_rf[waddr], n);
|
||||
} else if (v3d_qpu_magic_waddr_is_tmu(state->devinfo, waddr)) {
|
||||
/* XXX perf: For V3D 4.x, we could reorder TMU writes other
|
||||
* than the TMUS/TMUD/TMUA to improve scheduling flexibility.
|
||||
*/
|
||||
add_write_dep(state, &state->last_tmu_write, n);
|
||||
switch (waddr) {
|
||||
case V3D_QPU_WADDR_TMUS:
|
||||
case V3D_QPU_WADDR_TMUSCM:
|
||||
case V3D_QPU_WADDR_TMUSF:
|
||||
case V3D_QPU_WADDR_TMUSLOD:
|
||||
case V3D_QPU_WADDR_TMUA:
|
||||
case V3D_QPU_WADDR_TMUAU:
|
||||
if (can_reorder_tmu_write(state->devinfo, waddr))
|
||||
add_read_dep(state, state->last_tmu_write, n);
|
||||
else
|
||||
add_write_dep(state, &state->last_tmu_write, n);
|
||||
|
||||
if (tmu_write_is_sequence_terminator(waddr))
|
||||
add_write_dep(state, &state->last_tmu_config, n);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else if (v3d_qpu_magic_waddr_is_sfu(waddr)) {
|
||||
/* Handled by v3d_qpu_writes_r4() check. */
|
||||
} else {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue