From 7fd5f76393da2e6cd8d71eef8a382769e2fac3a2 Mon Sep 17 00:00:00 2001 From: Zan Dobersek Date: Sun, 14 Jul 2024 08:59:27 +0200 Subject: [PATCH] nir/lower_vars_to_scratch: calculate threshold-limited variable size separately ir3's lowering of variables to scratch memory has to treat 8-bit values as 16-bit ones when comparing such value's size against the given threshold since those values are handled through 16-bit half-registers. But those values can still use natural 8-bit size and alignment for storing inside scratch memory. nir_lower_vars_to_scratch now accepts two size-and-alignment functions, one used for calculating the variable size and the other for calculating the size and alignment needed for storing inside scratch memory. Non-ir3 uses of this pass can just duplicate the currently-used function. ir3 provides a separate variable-size function that special-cases 8-bit types. Signed-off-by: Zan Dobersek Part-of: --- src/amd/common/ac_nir.c | 2 +- src/asahi/compiler/agx_compile.c | 2 +- src/broadcom/compiler/vir.c | 1 + src/compiler/glsl_types.c | 2 +- src/compiler/glsl_types.h | 3 +++ src/compiler/nir/nir.h | 3 ++- src/compiler/nir/nir_lower_scratch.c | 9 ++++---- src/freedreno/ir3/ir3_nir.c | 28 +++++++++++++++++++++++- src/gallium/drivers/r600/sfn/sfn_nir.cpp | 1 + src/panfrost/compiler/bifrost_compile.c | 8 +++---- 10 files changed, 46 insertions(+), 13 deletions(-) diff --git a/src/amd/common/ac_nir.c b/src/amd/common/ac_nir.c index edfc9c16c58..b77e8b5a4a0 100644 --- a/src/amd/common/ac_nir.c +++ b/src/amd/common/ac_nir.c @@ -648,7 +648,7 @@ ac_nir_lower_indirect_derefs(nir_shader *shader, * scratch to alloca's, assuming LLVM won't generate VGPR indexing. */ NIR_PASS(progress, shader, nir_lower_vars_to_scratch, nir_var_function_temp, 256, - glsl_get_natural_size_align_bytes); + glsl_get_natural_size_align_bytes, glsl_get_natural_size_align_bytes); /* LLVM doesn't support VGPR indexing on GFX9. */ bool llvm_has_working_vgpr_indexing = gfx_level != GFX9; diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 89c9c0a831a..5350aae7cb2 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -3293,7 +3293,7 @@ agx_preprocess_nir(nir_shader *nir, const nir_shader *libagx) /* Lower large arrays to scratch and small arrays to csel */ NIR_PASS(_, nir, nir_lower_vars_to_scratch, nir_var_function_temp, 16, - glsl_get_natural_size_align_bytes); + glsl_get_natural_size_align_bytes, glsl_get_natural_size_align_bytes); NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_function_temp, ~0); NIR_PASS(_, nir, nir_split_var_copies); NIR_PASS(_, nir, nir_lower_global_vars_to_local); diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index a087282cfc8..dd638c3e64c 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -1752,6 +1752,7 @@ v3d_attempt_compile(struct v3d_compile *c) NIR_PASS(_, c->s, nir_lower_vars_to_scratch, nir_var_function_temp, 0, + glsl_get_natural_size_align_bytes, glsl_get_natural_size_align_bytes); NIR_PASS(_, c->s, v3d_nir_lower_global_2x32); diff --git a/src/compiler/glsl_types.c b/src/compiler/glsl_types.c index 7a31de2a312..808a3cb40f2 100644 --- a/src/compiler/glsl_types.c +++ b/src/compiler/glsl_types.c @@ -3685,7 +3685,7 @@ glsl_channel_type(const glsl_type *t) } } -static void +void glsl_size_align_handle_array_and_structs(const glsl_type *type, glsl_type_size_align_func size_align, unsigned *size, unsigned *align) diff --git a/src/compiler/glsl_types.h b/src/compiler/glsl_types.h index fefba4115f3..ce64318eb58 100644 --- a/src/compiler/glsl_types.h +++ b/src/compiler/glsl_types.h @@ -1350,6 +1350,9 @@ glsl_get_explicit_interface_type(const glsl_type *t, bool supports_std430) } } +void glsl_size_align_handle_array_and_structs(const glsl_type *type, + glsl_type_size_align_func size_align, + unsigned *size, unsigned *align); void glsl_get_natural_size_align_bytes(const glsl_type *t, unsigned *size, unsigned *align); void glsl_get_vec4_size_align_bytes(const glsl_type *type, unsigned *size, unsigned *align); diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index d3024d0343e..c423398c167 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -5434,7 +5434,8 @@ bool nir_lower_io_to_temporaries(nir_shader *shader, bool nir_lower_vars_to_scratch(nir_shader *shader, nir_variable_mode modes, int size_threshold, - glsl_type_size_align_func size_align); + glsl_type_size_align_func variable_size_align, + glsl_type_size_align_func scratch_layout_size_align); void nir_lower_clip_halfz(nir_shader *shader); diff --git a/src/compiler/nir/nir_lower_scratch.c b/src/compiler/nir/nir_lower_scratch.c index 50eecfff6e5..3536e05d9c4 100644 --- a/src/compiler/nir/nir_lower_scratch.c +++ b/src/compiler/nir/nir_lower_scratch.c @@ -95,7 +95,8 @@ bool nir_lower_vars_to_scratch(nir_shader *shader, nir_variable_mode modes, int size_threshold, - glsl_type_size_align_func size_align) + glsl_type_size_align_func variable_size_align, + glsl_type_size_align_func scratch_layout_size_align) { struct set *set = _mesa_pointer_set_create(NULL); @@ -131,7 +132,7 @@ nir_lower_vars_to_scratch(nir_shader *shader, continue; unsigned var_size, var_align; - size_align(var->type, &var_size, &var_align); + variable_size_align(var->type, &var_size, &var_align); if (var_size <= size_threshold) continue; @@ -207,13 +208,13 @@ nir_lower_vars_to_scratch(nir_shader *shader, if (var->data.location == INT_MAX) { unsigned var_size, var_align; - size_align(var->type, &var_size, &var_align); + scratch_layout_size_align(var->type, &var_size, &var_align); var->data.location = ALIGN_POT(shader->scratch_size, var_align); shader->scratch_size = var->data.location + var_size; } - lower_load_store(&build, intrin, size_align); + lower_load_store(&build, intrin, scratch_layout_size_align); impl_progress = true; } } diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 3db75b6ab9a..bd9eec6e8d1 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -181,6 +181,31 @@ ir3_lower_bit_size(const nir_instr *instr, UNUSED void *data) return 0; } +static void +ir3_get_variable_size_align_bytes(const glsl_type *type, unsigned *size, unsigned *align) +{ + switch (type->base_type) { + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_STRUCT: + glsl_size_align_handle_array_and_structs(type, ir3_get_variable_size_align_bytes, + size, align); + break; + case GLSL_TYPE_UINT8: + case GLSL_TYPE_INT8: + /* 8-bit values are handled through 16-bit half-registers, so the resulting size + * and alignment value has to be doubled to reflect the actual variable size + * requirement. + */ + *size = 2 * glsl_get_components(type); + *align = 2; + break; + default: + glsl_get_natural_size_align_bytes(type, size, align); + break; + } +} + #define OPT(nir, pass, ...) \ ({ \ bool this_progress = false; \ @@ -828,7 +853,8 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s) */ if (so->compiler->has_pvtmem) { progress |= OPT(s, nir_lower_vars_to_scratch, nir_var_function_temp, - 16 * 16 /* bytes */, glsl_get_natural_size_align_bytes); + 16 * 16 /* bytes */, + ir3_get_variable_size_align_bytes, glsl_get_natural_size_align_bytes); } /* Lower scratch writemasks */ diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/src/gallium/drivers/r600/sfn/sfn_nir.cpp index eff49b287e4..3d8632d7fb3 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp @@ -847,6 +847,7 @@ r600_lower_and_optimize_nir(nir_shader *sh, nir_lower_vars_to_scratch, nir_var_function_temp, 40, + r600_get_natural_size_align_bytes, r600_get_natural_size_align_bytes); while (optimize_once(sh)) diff --git a/src/panfrost/compiler/bifrost_compile.c b/src/panfrost/compiler/bifrost_compile.c index 10088656e6f..4ceff8d5be9 100644 --- a/src/panfrost/compiler/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost_compile.c @@ -4937,12 +4937,12 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id) * (currently unconditional for Valhall), we force vec4 alignment for * scratch access. */ - bool packed_tls = (gpu_id >= 0x9000); - + glsl_type_size_align_func vars_to_scratch_size_align_func = + (gpu_id >= 0x9000) ? glsl_get_vec4_size_align_bytes + : glsl_get_natural_size_align_bytes; /* Lower large arrays to scratch and small arrays to bcsel */ NIR_PASS_V(nir, nir_lower_vars_to_scratch, nir_var_function_temp, 256, - packed_tls ? glsl_get_vec4_size_align_bytes - : glsl_get_natural_size_align_bytes); + vars_to_scratch_size_align_func, vars_to_scratch_size_align_func); NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_function_temp, ~0); NIR_PASS_V(nir, nir_split_var_copies);