ir3/legalize: make ir3_legalize_state and helpers public

Signed-off-by: Job Noorman <job@noorman.info>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34108>
This commit is contained in:
Job Noorman 2025-05-09 11:26:05 +02:00 committed by Marge Bot
parent 631a105148
commit ca014c7c24
2 changed files with 73 additions and 58 deletions

View file

@ -3317,4 +3317,69 @@ regmask_get(regmask_t *regmask, struct ir3_register *reg)
}
/* ************************************************************************* */
struct ir3_nop_state {
unsigned full_ready[GPR_REG_SIZE];
unsigned half_ready[GPR_REG_SIZE];
};
struct ir3_legalize_state {
regmask_t needs_ss;
regmask_t needs_ss_scalar_full; /* half scalar ALU producer -> full scalar ALU consumer */
regmask_t needs_ss_scalar_half; /* full scalar ALU producer -> half scalar ALU consumer */
regmask_t needs_ss_war; /* write after read */
regmask_t needs_sy_war; /* WAR that can only be resolved using (sy) */
regmask_t needs_ss_or_sy_war; /* WAR for sy-producer sources */
regmask_t needs_ss_scalar_war; /* scalar ALU write -> ALU write */
regmask_t needs_ss_or_sy_scalar_war;
regmask_t needs_sy;
bool needs_ss_for_const;
bool needs_sy_for_const;
/* Next instruction needs (ss)/(sy), no matter its dsts/srcs. */
bool force_ss;
bool force_sy;
/* Each of these arrays contains the cycle when the corresponding register
* becomes "ready" i.e. does not require any more nops. There is a special
* mechanism to let ALU instructions read compatible (i.e. same halfness)
* destinations of another ALU instruction with less delay, so this can
* depend on what type the consuming instruction is, which is why there are
* multiple arrays. The cycle is counted relative to the start of the block.
*/
/* When ALU instructions reading the given full/half register will be ready.
*/
struct ir3_nop_state alu_nop;
/* When non-ALU (e.g. cat5) instructions reading the given full/half register
* will be ready.
*/
struct ir3_nop_state non_alu_nop;
/* When p0.x-w, a0.x, and a1.x are ready. */
unsigned pred_ready[4];
unsigned addr_ready[2];
unsigned cycle;
};
typedef struct ir3_legalize_state *(*ir3_get_block_legalize_state_cb)(
struct ir3_block *);
void ir3_init_legalize_state(struct ir3_legalize_state *state,
struct ir3_compiler *compiler);
void ir3_merge_pred_legalize_states(struct ir3_legalize_state *state,
struct ir3_block *block,
ir3_get_block_legalize_state_cb get_state);
void ir3_update_legalize_state(struct ir3_legalize_state *state,
struct ir3_compiler *compiler,
struct ir3_instruction *n);
enum ir3_instruction_flags
ir3_required_sync_flags(struct ir3_legalize_state *state,
struct ir3_compiler *compiler,
struct ir3_instruction *n);
unsigned ir3_required_delay(struct ir3_legalize_state *state,
struct ir3_compiler *compiler,
struct ir3_instruction *instr);
#endif /* IR3_H_ */

View file

@ -37,52 +37,6 @@ struct ir3_legalize_ctx {
bool has_tex_prefetch;
};
struct ir3_nop_state {
unsigned full_ready[GPR_REG_SIZE];
unsigned half_ready[GPR_REG_SIZE];
};
struct ir3_legalize_state {
regmask_t needs_ss;
regmask_t needs_ss_scalar_full; /* half scalar ALU producer -> full scalar ALU consumer */
regmask_t needs_ss_scalar_half; /* full scalar ALU producer -> half scalar ALU consumer */
regmask_t needs_ss_war; /* write after read */
regmask_t needs_sy_war; /* WAR that can only be resolved using (sy) */
regmask_t needs_ss_or_sy_war; /* WAR for sy-producer sources */
regmask_t needs_ss_scalar_war; /* scalar ALU write -> ALU write */
regmask_t needs_ss_or_sy_scalar_war;
regmask_t needs_sy;
bool needs_ss_for_const;
bool needs_sy_for_const;
/* Next instruction needs (ss)/(sy), no matter its dsts/srcs. */
bool force_ss;
bool force_sy;
/* Each of these arrays contains the cycle when the corresponding register
* becomes "ready" i.e. does not require any more nops. There is a special
* mechanism to let ALU instructions read compatible (i.e. same halfness)
* destinations of another ALU instruction with less delay, so this can
* depend on what type the consuming instruction is, which is why there are
* multiple arrays. The cycle is counted relative to the start of the block.
*/
/* When ALU instructions reading the given full/half register will be ready.
*/
struct ir3_nop_state alu_nop;
/* When non-ALU (e.g. cat5) instructions reading the given full/half register
* will be ready.
*/
struct ir3_nop_state non_alu_nop;
/* When p0.x-w, a0.x, and a1.x are ready. */
unsigned pred_ready[4];
unsigned addr_ready[2];
unsigned cycle;
};
struct ir3_legalize_block_data {
bool valid;
struct ir3_legalize_state begin_state;
@ -118,7 +72,7 @@ needs_sy_war(struct ir3_legalize_state *state, struct ir3_register *dst)
return regmask_get(&state->needs_sy_war, dst);
}
static enum ir3_instruction_flags
enum ir3_instruction_flags
ir3_required_sync_flags(struct ir3_legalize_state *state,
struct ir3_compiler *compiler,
struct ir3_instruction *n)
@ -358,7 +312,7 @@ sync_update(struct ir3_legalize_state *state, struct ir3_compiler *compiler,
}
}
static void
void
ir3_init_legalize_state(struct ir3_legalize_state *state,
struct ir3_compiler *compiler)
{
@ -380,10 +334,7 @@ get_block_legalize_state(struct ir3_block *block)
return &bd->state;
}
typedef struct ir3_legalize_state *(*ir3_get_block_legalize_state_cb)(
struct ir3_block *);
static void
void
ir3_merge_pred_legalize_states(struct ir3_legalize_state *state,
struct ir3_block *block,
ir3_get_block_legalize_state_cb get_state)
@ -522,10 +473,9 @@ get_ready_slot(struct ir3_legalize_state *state,
}
}
static unsigned
delay_calc(struct ir3_compiler *compiler,
struct ir3_legalize_state *state,
struct ir3_instruction *instr)
unsigned
ir3_required_delay(struct ir3_legalize_state *state,
struct ir3_compiler *compiler, struct ir3_instruction *instr)
{
/* As far as we know, shader outputs don't need any delay. */
if (instr->opc == OPC_END || instr->opc == OPC_CHMASK)
@ -645,7 +595,7 @@ delay_update(struct ir3_compiler *compiler,
}
}
static void
void
ir3_update_legalize_state(struct ir3_legalize_state *state,
struct ir3_compiler *compiler,
struct ir3_instruction *n)
@ -771,7 +721,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
ir3_update_legalize_state(state, ctx->compiler, nop);
}
unsigned delay = delay_calc(ctx->compiler, state, n);
unsigned delay = ir3_required_delay(state, ctx->compiler, n);
/* NOTE: I think the nopN encoding works for a5xx and
* probably a4xx, but not a3xx. So far only tested on