ir3: lower 64b registers before creating preamble

ir3_nir_lower_preamble cannot handle 64b @load/store_preamble so we have
to make sure ir3_nir_opt_preamble will never produce them. Up to now,
nir_lower_locals_to_regs was run after preamble lowering so 64b locals
could still be around when lowering the preamble. This patch moves
running this pass, as well as ir3_nir_lower_64b_regs, to before the
preamble lowering.

Fixed Piglit tests:
- spec@arb_gpu_shader_fp64@execution@fs-indirect-temp-double-dst
- spec@arb_gpu_shader_fp64@execution@built-in-functions@fs-frexp-dvec4-variable-index

This patch has no impact on shader-db.

Note: a few cleanup passes used to be run after nir_lower_locals_to_regs
(nir_opt_algebraic, nir_opt_constant_folding) and after
ir3_nir_lower_64b_regs (nir_lower_alu_to_scalar, nir_copy_prop). As far
as I can tell, these are not necessary anymore when running the register
lowering earlier so this patch removes them.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26737>
This commit is contained in:
Job Noorman 2023-12-15 11:43:46 +01:00 committed by Marge Bot
parent 6cad2fc230
commit 2d273c520c
3 changed files with 10 additions and 30 deletions

View file

@ -84,42 +84,14 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader,
ctx->s = nir_shader_clone(ctx, shader->nir);
ir3_nir_lower_variant(so, ctx->s);
/* this needs to be the last pass run, so do this here instead of
* in ir3_optimize_nir():
*/
bool progress = false;
bool needs_late_alg = false;
NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs, 1);
if (progress) {
bool regs_progress = false;
/* Split 64b registers into two 32b ones. */
NIR_PASS(regs_progress, ctx->s, ir3_nir_lower_64b_regs);
if (regs_progress) {
/* After splitting registers, we might still have some 64b vecs. Run
* some passes to get rid of them.
*/
NIR_PASS_V(ctx->s, nir_lower_alu_to_scalar, NULL, NULL);
NIR_PASS_V(ctx->s, nir_copy_prop);
}
}
/* we could need cleanup after lower_locals_to_regs */
while (progress) {
progress = false;
NIR_PASS(progress, ctx->s, nir_opt_algebraic);
NIR_PASS(progress, ctx->s, nir_opt_constant_folding);
needs_late_alg = true;
}
/* We want to lower nir_op_imul as late as possible, to catch also
* those generated by earlier passes (e.g,
* nir_lower_locals_to_regs). However, we want a final swing of a
* few passes to have a chance at optimizing the result.
*/
progress = false;
NIR_PASS(progress, ctx->s, ir3_nir_lower_imul);
while (progress) {
progress = false;

View file

@ -722,6 +722,13 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
/* Lower scratch writemasks */
progress |= OPT(s, nir_lower_wrmasks, should_split_wrmask, s);
if (OPT(s, nir_lower_locals_to_regs, 1)) {
progress = true;
/* Split 64b registers into two 32b ones. */
OPT_V(s, ir3_nir_lower_64b_regs);
}
progress |= OPT(s, ir3_nir_lower_wide_load_store);
progress |= OPT(s, ir3_nir_lower_64b_global);
progress |= OPT(s, ir3_nir_lower_64b_intrinsics);

View file

@ -440,8 +440,9 @@ ir3_nir_lower_preamble(nir_shader *nir, struct ir3_shader_variant *v)
* ...
*/
b->cursor = nir_before_impl(main);
/* @decl_regs need to stay in the first block. */
b->cursor = nir_after_reg_decls(main);
nir_if *outer_if = nir_push_if(b, nir_preamble_start_ir3(b, 1));
{
nir_if *inner_if = nir_push_if(b, nir_elect(b, 1));