From 127f67a66a68c6389eb024ea09ea701c5ed452fd Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Wed, 4 Sep 2024 18:32:13 +0200 Subject: [PATCH] ir3: Respect aligment of required consts when allocating ubo,preamble Optional const allocations (preamble, UBO, etc.) may shift the required consts (e.g. driver params) more than they expect. The free space for optional allocations should respect the aligment of required consts that come after them. Example: there are maximum 100 vec4 consts, driver params take 4 units, something else takes 1 unit. Now premable thinks that there is 95 free units, however driver params cannot start at offset=95 it has higher aligment. Fixes some d3d12 games. Signed-off-by: Danylo Piliaiev Part-of: --- src/freedreno/ir3/ir3_nir.c | 25 ++++++++++++++++++- src/freedreno/ir3/ir3_nir.h | 2 ++ .../ir3/ir3_nir_analyze_ubo_ranges.c | 5 ++-- src/freedreno/ir3/ir3_nir_opt_preamble.c | 2 +- src/freedreno/ir3/ir3_shader.h | 6 +++++ 5 files changed, 36 insertions(+), 4 deletions(-) diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index cbc4704cedb..051033190d7 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -1209,6 +1209,16 @@ ir3_nir_scan_driver_consts(struct ir3_compiler *compiler, nir_shader *shader, st } } +static unsigned +ir3_align_constoff(struct ir3_const_state *const_state, unsigned constoff, + unsigned aligment) +{ + constoff = align(constoff, aligment); + const_state->required_consts_aligment_vec4 = + MAX2(const_state->required_consts_aligment_vec4, aligment); + return constoff; +} + /* Sets up the variant-dependent constant state for the ir3_shader. Note * that it is also used from ir3_nir_analyze_ubo_ranges() to figure out the * maximum number of driver params that would eventually be used, to leave @@ -1221,6 +1231,7 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v, struct ir3_compiler *compiler = v->compiler; memset(&const_state->offsets, ~0, sizeof(const_state->offsets)); + const_state->required_consts_aligment_vec4 = 1; ir3_nir_scan_driver_consts(compiler, nir, const_state); @@ -1270,7 +1281,8 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v, /* offset cannot be 0 for vs params loaded by CP_DRAW_INDIRECT_MULTI */ if (v->type == MESA_SHADER_VERTEX && compiler->gen >= 6) constoff = MAX2(constoff, 1); - constoff = align(constoff, upload_unit); + constoff = ir3_align_constoff(const_state, constoff, upload_unit); + const_state->offsets.driver_param = constoff; constoff += align(const_state->num_driver_params / 4, upload_unit); @@ -1322,3 +1334,14 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v, assert(constoff <= ir3_max_const(v)); } + +uint32_t +ir3_const_state_get_free_space(const struct ir3_shader_variant *v, + const struct ir3_const_state *const_state) +{ + uint32_t free_space_vec4 = ir3_max_const(v) - const_state->offsets.immediate; + free_space_vec4 = + (free_space_vec4 / const_state->required_consts_aligment_vec4) * + const_state->required_consts_aligment_vec4; + return free_space_vec4; +} diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index 0a8a44d3263..49368a74dce 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -63,6 +63,8 @@ void ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s); void ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v, struct ir3_const_state *const_state); +uint32_t ir3_const_state_get_free_space(const struct ir3_shader_variant *v, + const struct ir3_const_state *const_state); bool ir3_nir_lower_load_constant(nir_shader *nir, struct ir3_shader_variant *v); void ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v); bool ir3_nir_lower_ubo_loads(nir_shader *nir, struct ir3_shader_variant *v); diff --git a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c index 3b2a3564b1c..c22dacdb789 100644 --- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c +++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c @@ -560,7 +560,8 @@ ir3_nir_lower_const_global_loads(nir_shader *nir, struct ir3_shader_variant *v) .preamble_size = const_state->preamble_size, }; ir3_setup_const_state(nir, v, &worst_case_const_state); - max_upload = (ir3_max_const(v) - worst_case_const_state.offsets.immediate) * 16; + max_upload = + ir3_const_state_get_free_space(v, &worst_case_const_state) * 16; } struct ir3_ubo_analysis_state state = {}; @@ -636,7 +637,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v) }; ir3_setup_const_state(nir, v, &worst_case_const_state); const uint32_t max_upload = - (ir3_max_const(v) - worst_case_const_state.offsets.immediate) * 16; + ir3_const_state_get_free_space(v, &worst_case_const_state) * 16; memset(state, 0, sizeof(*state)); diff --git a/src/freedreno/ir3/ir3_nir_opt_preamble.c b/src/freedreno/ir3/ir3_nir_opt_preamble.c index 2c458fcd216..f04169c4ade 100644 --- a/src/freedreno/ir3/ir3_nir_opt_preamble.c +++ b/src/freedreno/ir3/ir3_nir_opt_preamble.c @@ -288,7 +288,7 @@ ir3_nir_opt_preamble(nir_shader *nir, struct ir3_shader_variant *v) } else { struct ir3_const_state worst_case_const_state = {}; ir3_setup_const_state(nir, v, &worst_case_const_state); - max_size = (ir3_max_const(v) - worst_case_const_state.offsets.immediate) * 4; + max_size = ir3_const_state_get_free_space(v, &worst_case_const_state) * 4; } if (max_size == 0) diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index b803982182c..ec513c92461 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -186,6 +186,12 @@ struct ir3_const_state { struct ir3_driver_ubo driver_params_ubo; struct ir3_driver_ubo primitive_map_ubo, primitive_param_ubo; + /* Optional const allocations (preamble, UBO, etc.) may shift the required + * consts more than they expect. The free space for optional allocations + * should respect required_consts_aligment_vec4. + */ + uint32_t required_consts_aligment_vec4; + int32_t constant_data_dynamic_offsets; struct {