mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 02:48:06 +02:00
ir3/legalize: track need_ss/sy_for_const per const reg
Instead of tracking if *any* const reg has been written since the last sync, use a bitset to track exactly which const regs have been written. This often helps us prevent stalls. Preamble stats: Totals from 32893 (18.66% of 176258) affected shaders: Instrs: 3540796 -> 3540370 (-0.01%); split: -0.08%, +0.07% CodeSize: 30635588 -> 30627370 (-0.03%); split: -0.09%, +0.07% NOPs: 491600 -> 491174 (-0.09%); split: -0.58%, +0.49% (ss): 465746 -> 450057 (-3.37%); split: -3.54%, +0.17% (sy): 89251 -> 85497 (-4.21%); split: -4.30%, +0.09% (ss)-stall: 1210233 -> 1164381 (-3.79%); split: -4.44%, +0.66% (sy)-stall: 1286176 -> 1283034 (-0.24%); split: -0.94%, +0.70% Cat0: 594508 -> 594082 (-0.07%); split: -0.48%, +0.41% Signed-off-by: Job Noorman <jnoorman@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40954>
This commit is contained in:
parent
c1bf9d6fd5
commit
4808037f6e
2 changed files with 39 additions and 14 deletions
|
|
@ -1544,6 +1544,7 @@ writes_pred(struct ir3_instruction *instr)
|
|||
#define SHARED_REG_SIZE (4 * 8)
|
||||
#define NONGPR_REG_START (SHARED_REG_START + SHARED_REG_SIZE)
|
||||
#define NONGPR_REG_SIZE (4 * 8)
|
||||
#define CONST_REG_SIZE (4 * 512)
|
||||
|
||||
enum ir3_reg_file {
|
||||
IR3_FILE_FULL,
|
||||
|
|
@ -3347,6 +3348,8 @@ struct ir3_nop_state {
|
|||
unsigned half_ready[GPR_REG_SIZE];
|
||||
};
|
||||
|
||||
typedef BITSET_DECLARE(conststate_t, CONST_REG_SIZE);
|
||||
|
||||
struct ir3_legalize_state {
|
||||
regmask_t needs_ss;
|
||||
regmask_t needs_ss_scalar_full; /* half scalar ALU producer -> full scalar ALU consumer */
|
||||
|
|
@ -3357,8 +3360,8 @@ struct ir3_legalize_state {
|
|||
regmask_t needs_ss_scalar_war; /* scalar ALU write -> ALU write */
|
||||
regmask_t needs_ss_or_sy_scalar_war;
|
||||
regmask_t needs_sy;
|
||||
bool needs_ss_for_const;
|
||||
bool needs_sy_for_const;
|
||||
conststate_t needs_ss_for_const;
|
||||
conststate_t needs_sy_for_const;
|
||||
|
||||
/* Next instruction needs (ss)/(sy), no matter its dsts/srcs. */
|
||||
bool force_ss;
|
||||
|
|
|
|||
|
|
@ -149,11 +149,23 @@ ir3_required_sync_flags(struct ir3_legalize_state *state,
|
|||
flags |= IR3_INSTR_SY;
|
||||
}
|
||||
} else if ((reg->flags & IR3_REG_CONST)) {
|
||||
if (state->needs_ss_for_const) {
|
||||
flags |= IR3_INSTR_SS;
|
||||
}
|
||||
if (state->needs_sy_for_const) {
|
||||
flags |= IR3_INSTR_SY;
|
||||
if (reg->flags & IR3_REG_RELATIV) {
|
||||
/* Since we don't know which const reg is accessed, add sync flags
|
||||
* if any const reg need them.
|
||||
*/
|
||||
if (!BITSET_IS_EMPTY(state->needs_ss_for_const)) {
|
||||
flags |= IR3_INSTR_SS;
|
||||
}
|
||||
if (!BITSET_IS_EMPTY(state->needs_sy_for_const)) {
|
||||
flags |= IR3_INSTR_SY;
|
||||
}
|
||||
} else {
|
||||
if (BITSET_TEST(state->needs_ss_for_const, reg->num)) {
|
||||
flags |= IR3_INSTR_SS;
|
||||
}
|
||||
if (BITSET_TEST(state->needs_sy_for_const, reg->num)) {
|
||||
flags |= IR3_INSTR_SY;
|
||||
}
|
||||
}
|
||||
} else if (!(reg->flags & (IR3_REG_IMMED | IR3_REG_RT))) {
|
||||
if (regmask_get(&state->needs_ss, reg)) {
|
||||
|
|
@ -186,7 +198,7 @@ apply_ss(struct ir3_legalize_state *state, bool mergedregs)
|
|||
regmask_init(&state->needs_ss_or_sy_scalar_war, mergedregs);
|
||||
regmask_init(&state->needs_ss_scalar_full, mergedregs);
|
||||
regmask_init(&state->needs_ss_scalar_half, mergedregs);
|
||||
state->needs_ss_for_const = false;
|
||||
BITSET_ZERO(state->needs_ss_for_const);
|
||||
state->force_ss = false;
|
||||
}
|
||||
|
||||
|
|
@ -197,7 +209,7 @@ apply_sy(struct ir3_legalize_state *state, bool mergedregs)
|
|||
regmask_init(&state->needs_sy_war, mergedregs);
|
||||
regmask_init(&state->needs_ss_or_sy_war, mergedregs);
|
||||
regmask_init(&state->needs_ss_or_sy_scalar_war, mergedregs);
|
||||
state->needs_sy_for_const = false;
|
||||
BITSET_ZERO(state->needs_sy_for_const);
|
||||
state->force_sy = false;
|
||||
}
|
||||
|
||||
|
|
@ -258,10 +270,18 @@ sync_update(struct ir3_legalize_state *state, struct ir3_compiler *compiler,
|
|||
} else {
|
||||
regmask_set(&state->needs_ss, n->dsts[0]);
|
||||
}
|
||||
} else if (n->opc == OPC_PUSH_CONSTS_LOAD_MACRO || n->opc == OPC_STC) {
|
||||
state->needs_ss_for_const = true;
|
||||
} else if (n->opc == OPC_PUSH_CONSTS_LOAD_MACRO) {
|
||||
unsigned const_dst = n->push_consts.dst_base;
|
||||
unsigned const_size = n->push_consts.src_size * 2;
|
||||
BITSET_SET_COUNT(state->needs_ss_for_const, const_dst, const_size);
|
||||
} else if (n->opc == OPC_STC) {
|
||||
unsigned const_dst = n->cat6.dst_offset;
|
||||
unsigned const_size = n->cat6.iim_val;
|
||||
BITSET_SET_COUNT(state->needs_ss_for_const, const_dst, const_size);
|
||||
} else if (n->opc == OPC_LDC_K) {
|
||||
state->needs_sy_for_const = true;
|
||||
unsigned const_dst = n->cat6.dst_offset;
|
||||
unsigned const_size = n->cat6.iim_val * 4;
|
||||
BITSET_SET_COUNT(state->needs_sy_for_const, const_dst, const_size);
|
||||
}
|
||||
|
||||
/* both tex/sfu appear to not always immediately consume
|
||||
|
|
@ -370,8 +390,10 @@ ir3_merge_pred_legalize_states(struct ir3_legalize_state *state,
|
|||
regmask_or(&state->needs_ss_or_sy_war, &state->needs_ss_or_sy_war,
|
||||
&pstate->needs_ss_or_sy_war);
|
||||
regmask_or(&state->needs_sy, &state->needs_sy, &pstate->needs_sy);
|
||||
state->needs_ss_for_const |= pstate->needs_ss_for_const;
|
||||
state->needs_sy_for_const |= pstate->needs_sy_for_const;
|
||||
BITSET_OR(state->needs_ss_for_const, state->needs_ss_for_const,
|
||||
pstate->needs_ss_for_const);
|
||||
BITSET_OR(state->needs_sy_for_const, state->needs_sy_for_const,
|
||||
pstate->needs_sy_for_const);
|
||||
state->force_ss |= pstate->force_ss;
|
||||
state->force_sy |= pstate->force_sy;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue