ir3: Respect aligment of required consts when allocating ubo,preamble

Optional const allocations (preamble, UBO, etc.) may shift the required
consts (e.g. driver params) more than they expect.
The free space for optional allocations should respect the aligment of
required consts that come after them.

Example: there are maximum 100 vec4 consts, driver params take 4 units,
something else takes 1 unit. Now premable thinks that there is 95 free
units, however driver params cannot start at offset=95 it has higher
aligment.

Fixes some d3d12 games.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31027>
This commit is contained in:
Danylo Piliaiev 2024-09-04 18:32:13 +02:00 committed by Marge Bot
parent 1d71557397
commit 127f67a66a
5 changed files with 36 additions and 4 deletions

View file

@ -1209,6 +1209,16 @@ ir3_nir_scan_driver_consts(struct ir3_compiler *compiler, nir_shader *shader, st
}
}
static unsigned
ir3_align_constoff(struct ir3_const_state *const_state, unsigned constoff,
unsigned aligment)
{
constoff = align(constoff, aligment);
const_state->required_consts_aligment_vec4 =
MAX2(const_state->required_consts_aligment_vec4, aligment);
return constoff;
}
/* Sets up the variant-dependent constant state for the ir3_shader. Note
* that it is also used from ir3_nir_analyze_ubo_ranges() to figure out the
* maximum number of driver params that would eventually be used, to leave
@ -1221,6 +1231,7 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
struct ir3_compiler *compiler = v->compiler;
memset(&const_state->offsets, ~0, sizeof(const_state->offsets));
const_state->required_consts_aligment_vec4 = 1;
ir3_nir_scan_driver_consts(compiler, nir, const_state);
@ -1270,7 +1281,8 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
/* offset cannot be 0 for vs params loaded by CP_DRAW_INDIRECT_MULTI */
if (v->type == MESA_SHADER_VERTEX && compiler->gen >= 6)
constoff = MAX2(constoff, 1);
constoff = align(constoff, upload_unit);
constoff = ir3_align_constoff(const_state, constoff, upload_unit);
const_state->offsets.driver_param = constoff;
constoff += align(const_state->num_driver_params / 4, upload_unit);
@ -1322,3 +1334,14 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
assert(constoff <= ir3_max_const(v));
}
uint32_t
ir3_const_state_get_free_space(const struct ir3_shader_variant *v,
const struct ir3_const_state *const_state)
{
uint32_t free_space_vec4 = ir3_max_const(v) - const_state->offsets.immediate;
free_space_vec4 =
(free_space_vec4 / const_state->required_consts_aligment_vec4) *
const_state->required_consts_aligment_vec4;
return free_space_vec4;
}

View file

@ -63,6 +63,8 @@ void ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s);
void ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
struct ir3_const_state *const_state);
uint32_t ir3_const_state_get_free_space(const struct ir3_shader_variant *v,
const struct ir3_const_state *const_state);
bool ir3_nir_lower_load_constant(nir_shader *nir, struct ir3_shader_variant *v);
void ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v);
bool ir3_nir_lower_ubo_loads(nir_shader *nir, struct ir3_shader_variant *v);

View file

@ -560,7 +560,8 @@ ir3_nir_lower_const_global_loads(nir_shader *nir, struct ir3_shader_variant *v)
.preamble_size = const_state->preamble_size,
};
ir3_setup_const_state(nir, v, &worst_case_const_state);
max_upload = (ir3_max_const(v) - worst_case_const_state.offsets.immediate) * 16;
max_upload =
ir3_const_state_get_free_space(v, &worst_case_const_state) * 16;
}
struct ir3_ubo_analysis_state state = {};
@ -636,7 +637,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v)
};
ir3_setup_const_state(nir, v, &worst_case_const_state);
const uint32_t max_upload =
(ir3_max_const(v) - worst_case_const_state.offsets.immediate) * 16;
ir3_const_state_get_free_space(v, &worst_case_const_state) * 16;
memset(state, 0, sizeof(*state));

View file

@ -288,7 +288,7 @@ ir3_nir_opt_preamble(nir_shader *nir, struct ir3_shader_variant *v)
} else {
struct ir3_const_state worst_case_const_state = {};
ir3_setup_const_state(nir, v, &worst_case_const_state);
max_size = (ir3_max_const(v) - worst_case_const_state.offsets.immediate) * 4;
max_size = ir3_const_state_get_free_space(v, &worst_case_const_state) * 4;
}
if (max_size == 0)

View file

@ -186,6 +186,12 @@ struct ir3_const_state {
struct ir3_driver_ubo driver_params_ubo;
struct ir3_driver_ubo primitive_map_ubo, primitive_param_ubo;
/* Optional const allocations (preamble, UBO, etc.) may shift the required
* consts more than they expect. The free space for optional allocations
* should respect required_consts_aligment_vec4.
*/
uint32_t required_consts_aligment_vec4;
int32_t constant_data_dynamic_offsets;
struct {