diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 64fd8e3d7af..b654935b40c 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -818,6 +818,7 @@ agx_emit_load_scratch(agx_builder *b, agx_index dst, nir_intrinsic_instr *instr) agx_stack_load_to(b, dst, offset, format, mask); agx_emit_cached_split(b, dst, nr); + b->shader->any_scratch = true; } static void @@ -829,6 +830,7 @@ agx_emit_store_scratch(agx_builder *b, nir_intrinsic_instr *instr) unsigned mask = BITFIELD_MASK(nir_src_num_components(instr->src[0])); agx_stack_store(b, value, offset, format, mask); + b->shader->any_scratch = true; } /* @@ -2776,15 +2778,12 @@ agx_compile_function_nir(nir_shader *nir, nir_function_impl *impl, emit_cf_list(ctx, &impl->body); agx_emit_phis_deferred(ctx); - if (impl->function->is_entrypoint && nir->scratch_size > 0) { - /* Apple always allocate 40 more bytes in the entrypoint and align to 4. */ - uint64_t stack_size = ALIGN(DIV_ROUND_UP(nir->scratch_size, 4) + 10, 4); - - assert(stack_size < INT16_MAX); - - agx_block *start_block = agx_start_block(ctx); - agx_builder _b = agx_init_builder(ctx, agx_before_block(start_block)); - agx_stack_adjust(&_b, stack_size); + /* Only allocate scratch if it's statically used, regardless of if the NIR + * info claims otherwise. + */ + if (ctx->any_scratch) { + assert(!ctx->is_preamble && "preambles don't use scratch"); + ctx->scratch_size = ALIGN(nir->scratch_size, 16); } /* Stop the main shader or preamble shader after the exit block. For real @@ -2838,6 +2837,22 @@ agx_compile_function_nir(nir_shader *nir, nir_function_impl *impl, agx_validate(ctx, "RA"); agx_lower_64bit_postra(ctx); + if (ctx->scratch_size > 0) { + /* Apple always allocate 40 more bytes in the entrypoint and align to 4. */ + uint64_t stack_size = ALIGN(DIV_ROUND_UP(ctx->scratch_size, 4) + 10, 4); + + assert(stack_size < INT16_MAX); + + agx_block *start_block = agx_start_block(ctx); + agx_builder _b = agx_init_builder(ctx, agx_before_block(start_block)); + agx_stack_adjust(&_b, stack_size); + + if (ctx->is_preamble) + out->preamble_scratch_size = stack_size; + else + out->scratch_size = stack_size; + } + if (ctx->stage == MESA_SHADER_VERTEX && !impl->function->is_preamble) agx_set_st_vary_final(ctx); @@ -3145,8 +3160,6 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key, nir_print_shader(nir, stdout); out->local_size = nir->info.shared_size; - if (nir->scratch_size > 0) - out->scratch_size = ALIGN(DIV_ROUND_UP(nir->scratch_size, 4) + 10, 4); nir_foreach_function_with_impl(func, impl, nir) { unsigned offset = diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h index c74b8a45911..8ced54419bc 100644 --- a/src/asahi/compiler/agx_compile.h +++ b/src/asahi/compiler/agx_compile.h @@ -114,8 +114,8 @@ struct agx_shader_info { /* Local memory allocation in bytes */ unsigned local_size; - /* Scratch memory allocation in bytes */ - unsigned scratch_size; + /* Scratch memory allocation in bytes for main/preamble respectively */ + unsigned scratch_size, preamble_scratch_size; /* Does the shader have a preamble? If so, it is at offset preamble_offset. * The main shader is at offset main_offset. The preamble is executed first. diff --git a/src/asahi/compiler/agx_compiler.h b/src/asahi/compiler/agx_compiler.h index 36fed76fab9..3075ba3f857 100644 --- a/src/asahi/compiler/agx_compiler.h +++ b/src/asahi/compiler/agx_compiler.h @@ -416,6 +416,7 @@ typedef struct { nir_shader *nir; gl_shader_stage stage; bool is_preamble; + unsigned scratch_size; struct list_head blocks; /* list of agx_block */ struct agx_shader_info *out; @@ -427,6 +428,9 @@ typedef struct { /* For creating temporaries */ unsigned alloc; + /* Does the shader statically use scratch memory? */ + bool any_scratch; + /* I don't really understand how writeout ops work yet */ bool did_writeout;