diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index bddd2c352ce..fafa51f1842 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1544,6 +1544,7 @@ writes_pred(struct ir3_instruction *instr) #define SHARED_REG_SIZE (4 * 8) #define NONGPR_REG_START (SHARED_REG_START + SHARED_REG_SIZE) #define NONGPR_REG_SIZE (4 * 8) +#define CONST_REG_SIZE (4 * 512) enum ir3_reg_file { IR3_FILE_FULL, @@ -3347,6 +3348,8 @@ struct ir3_nop_state { unsigned half_ready[GPR_REG_SIZE]; }; +typedef BITSET_DECLARE(conststate_t, CONST_REG_SIZE); + struct ir3_legalize_state { regmask_t needs_ss; regmask_t needs_ss_scalar_full; /* half scalar ALU producer -> full scalar ALU consumer */ @@ -3357,8 +3360,8 @@ struct ir3_legalize_state { regmask_t needs_ss_scalar_war; /* scalar ALU write -> ALU write */ regmask_t needs_ss_or_sy_scalar_war; regmask_t needs_sy; - bool needs_ss_for_const; - bool needs_sy_for_const; + conststate_t needs_ss_for_const; + conststate_t needs_sy_for_const; /* Next instruction needs (ss)/(sy), no matter its dsts/srcs. */ bool force_ss; diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 9748367c572..635af257b1a 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -149,11 +149,23 @@ ir3_required_sync_flags(struct ir3_legalize_state *state, flags |= IR3_INSTR_SY; } } else if ((reg->flags & IR3_REG_CONST)) { - if (state->needs_ss_for_const) { - flags |= IR3_INSTR_SS; - } - if (state->needs_sy_for_const) { - flags |= IR3_INSTR_SY; + if (reg->flags & IR3_REG_RELATIV) { + /* Since we don't know which const reg is accessed, add sync flags + * if any const reg need them. + */ + if (!BITSET_IS_EMPTY(state->needs_ss_for_const)) { + flags |= IR3_INSTR_SS; + } + if (!BITSET_IS_EMPTY(state->needs_sy_for_const)) { + flags |= IR3_INSTR_SY; + } + } else { + if (BITSET_TEST(state->needs_ss_for_const, reg->num)) { + flags |= IR3_INSTR_SS; + } + if (BITSET_TEST(state->needs_sy_for_const, reg->num)) { + flags |= IR3_INSTR_SY; + } } } else if (!(reg->flags & (IR3_REG_IMMED | IR3_REG_RT))) { if (regmask_get(&state->needs_ss, reg)) { @@ -186,7 +198,7 @@ apply_ss(struct ir3_legalize_state *state, bool mergedregs) regmask_init(&state->needs_ss_or_sy_scalar_war, mergedregs); regmask_init(&state->needs_ss_scalar_full, mergedregs); regmask_init(&state->needs_ss_scalar_half, mergedregs); - state->needs_ss_for_const = false; + BITSET_ZERO(state->needs_ss_for_const); state->force_ss = false; } @@ -197,7 +209,7 @@ apply_sy(struct ir3_legalize_state *state, bool mergedregs) regmask_init(&state->needs_sy_war, mergedregs); regmask_init(&state->needs_ss_or_sy_war, mergedregs); regmask_init(&state->needs_ss_or_sy_scalar_war, mergedregs); - state->needs_sy_for_const = false; + BITSET_ZERO(state->needs_sy_for_const); state->force_sy = false; } @@ -258,10 +270,18 @@ sync_update(struct ir3_legalize_state *state, struct ir3_compiler *compiler, } else { regmask_set(&state->needs_ss, n->dsts[0]); } - } else if (n->opc == OPC_PUSH_CONSTS_LOAD_MACRO || n->opc == OPC_STC) { - state->needs_ss_for_const = true; + } else if (n->opc == OPC_PUSH_CONSTS_LOAD_MACRO) { + unsigned const_dst = n->push_consts.dst_base; + unsigned const_size = n->push_consts.src_size * 2; + BITSET_SET_COUNT(state->needs_ss_for_const, const_dst, const_size); + } else if (n->opc == OPC_STC) { + unsigned const_dst = n->cat6.dst_offset; + unsigned const_size = n->cat6.iim_val; + BITSET_SET_COUNT(state->needs_ss_for_const, const_dst, const_size); } else if (n->opc == OPC_LDC_K) { - state->needs_sy_for_const = true; + unsigned const_dst = n->cat6.dst_offset; + unsigned const_size = n->cat6.iim_val * 4; + BITSET_SET_COUNT(state->needs_sy_for_const, const_dst, const_size); } /* both tex/sfu appear to not always immediately consume @@ -370,8 +390,10 @@ ir3_merge_pred_legalize_states(struct ir3_legalize_state *state, regmask_or(&state->needs_ss_or_sy_war, &state->needs_ss_or_sy_war, &pstate->needs_ss_or_sy_war); regmask_or(&state->needs_sy, &state->needs_sy, &pstate->needs_sy); - state->needs_ss_for_const |= pstate->needs_ss_for_const; - state->needs_sy_for_const |= pstate->needs_sy_for_const; + BITSET_OR(state->needs_ss_for_const, state->needs_ss_for_const, + pstate->needs_ss_for_const); + BITSET_OR(state->needs_sy_for_const, state->needs_sy_for_const, + pstate->needs_sy_for_const); state->force_ss |= pstate->force_ss; state->force_sy |= pstate->force_sy;