diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index cbc4704cedb..051033190d7 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -1209,6 +1209,16 @@ ir3_nir_scan_driver_consts(struct ir3_compiler *compiler, nir_shader *shader, st } } +static unsigned +ir3_align_constoff(struct ir3_const_state *const_state, unsigned constoff, + unsigned aligment) +{ + constoff = align(constoff, aligment); + const_state->required_consts_aligment_vec4 = + MAX2(const_state->required_consts_aligment_vec4, aligment); + return constoff; +} + /* Sets up the variant-dependent constant state for the ir3_shader. Note * that it is also used from ir3_nir_analyze_ubo_ranges() to figure out the * maximum number of driver params that would eventually be used, to leave @@ -1221,6 +1231,7 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v, struct ir3_compiler *compiler = v->compiler; memset(&const_state->offsets, ~0, sizeof(const_state->offsets)); + const_state->required_consts_aligment_vec4 = 1; ir3_nir_scan_driver_consts(compiler, nir, const_state); @@ -1270,7 +1281,8 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v, /* offset cannot be 0 for vs params loaded by CP_DRAW_INDIRECT_MULTI */ if (v->type == MESA_SHADER_VERTEX && compiler->gen >= 6) constoff = MAX2(constoff, 1); - constoff = align(constoff, upload_unit); + constoff = ir3_align_constoff(const_state, constoff, upload_unit); + const_state->offsets.driver_param = constoff; constoff += align(const_state->num_driver_params / 4, upload_unit); @@ -1322,3 +1334,14 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v, assert(constoff <= ir3_max_const(v)); } + +uint32_t +ir3_const_state_get_free_space(const struct ir3_shader_variant *v, + const struct ir3_const_state *const_state) +{ + uint32_t free_space_vec4 = ir3_max_const(v) - const_state->offsets.immediate; + free_space_vec4 = + (free_space_vec4 / const_state->required_consts_aligment_vec4) * + const_state->required_consts_aligment_vec4; + return free_space_vec4; +} diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index 0a8a44d3263..49368a74dce 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -63,6 +63,8 @@ void ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s); void ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v, struct ir3_const_state *const_state); +uint32_t ir3_const_state_get_free_space(const struct ir3_shader_variant *v, + const struct ir3_const_state *const_state); bool ir3_nir_lower_load_constant(nir_shader *nir, struct ir3_shader_variant *v); void ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v); bool ir3_nir_lower_ubo_loads(nir_shader *nir, struct ir3_shader_variant *v); diff --git a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c index 3b2a3564b1c..c22dacdb789 100644 --- a/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c +++ b/src/freedreno/ir3/ir3_nir_analyze_ubo_ranges.c @@ -560,7 +560,8 @@ ir3_nir_lower_const_global_loads(nir_shader *nir, struct ir3_shader_variant *v) .preamble_size = const_state->preamble_size, }; ir3_setup_const_state(nir, v, &worst_case_const_state); - max_upload = (ir3_max_const(v) - worst_case_const_state.offsets.immediate) * 16; + max_upload = + ir3_const_state_get_free_space(v, &worst_case_const_state) * 16; } struct ir3_ubo_analysis_state state = {}; @@ -636,7 +637,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v) }; ir3_setup_const_state(nir, v, &worst_case_const_state); const uint32_t max_upload = - (ir3_max_const(v) - worst_case_const_state.offsets.immediate) * 16; + ir3_const_state_get_free_space(v, &worst_case_const_state) * 16; memset(state, 0, sizeof(*state)); diff --git a/src/freedreno/ir3/ir3_nir_opt_preamble.c b/src/freedreno/ir3/ir3_nir_opt_preamble.c index 2c458fcd216..f04169c4ade 100644 --- a/src/freedreno/ir3/ir3_nir_opt_preamble.c +++ b/src/freedreno/ir3/ir3_nir_opt_preamble.c @@ -288,7 +288,7 @@ ir3_nir_opt_preamble(nir_shader *nir, struct ir3_shader_variant *v) } else { struct ir3_const_state worst_case_const_state = {}; ir3_setup_const_state(nir, v, &worst_case_const_state); - max_size = (ir3_max_const(v) - worst_case_const_state.offsets.immediate) * 4; + max_size = ir3_const_state_get_free_space(v, &worst_case_const_state) * 4; } if (max_size == 0) diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index b803982182c..ec513c92461 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -186,6 +186,12 @@ struct ir3_const_state { struct ir3_driver_ubo driver_params_ubo; struct ir3_driver_ubo primitive_map_ubo, primitive_param_ubo; + /* Optional const allocations (preamble, UBO, etc.) may shift the required + * consts more than they expect. The free space for optional allocations + * should respect required_consts_aligment_vec4. + */ + uint32_t required_consts_aligment_vec4; + int32_t constant_data_dynamic_offsets; struct {