mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 17:50:12 +01:00
ir3: Respect aligment of required consts when allocating ubo,preamble
Optional const allocations (preamble, UBO, etc.) may shift the required consts (e.g. driver params) more than they expect. The free space for optional allocations should respect the aligment of required consts that come after them. Example: there are maximum 100 vec4 consts, driver params take 4 units, something else takes 1 unit. Now premable thinks that there is 95 free units, however driver params cannot start at offset=95 it has higher aligment. Fixes some d3d12 games. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31027>
This commit is contained in:
parent
1d71557397
commit
127f67a66a
5 changed files with 36 additions and 4 deletions
|
|
@ -1209,6 +1209,16 @@ ir3_nir_scan_driver_consts(struct ir3_compiler *compiler, nir_shader *shader, st
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned
|
||||||
|
ir3_align_constoff(struct ir3_const_state *const_state, unsigned constoff,
|
||||||
|
unsigned aligment)
|
||||||
|
{
|
||||||
|
constoff = align(constoff, aligment);
|
||||||
|
const_state->required_consts_aligment_vec4 =
|
||||||
|
MAX2(const_state->required_consts_aligment_vec4, aligment);
|
||||||
|
return constoff;
|
||||||
|
}
|
||||||
|
|
||||||
/* Sets up the variant-dependent constant state for the ir3_shader. Note
|
/* Sets up the variant-dependent constant state for the ir3_shader. Note
|
||||||
* that it is also used from ir3_nir_analyze_ubo_ranges() to figure out the
|
* that it is also used from ir3_nir_analyze_ubo_ranges() to figure out the
|
||||||
* maximum number of driver params that would eventually be used, to leave
|
* maximum number of driver params that would eventually be used, to leave
|
||||||
|
|
@ -1221,6 +1231,7 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
|
||||||
struct ir3_compiler *compiler = v->compiler;
|
struct ir3_compiler *compiler = v->compiler;
|
||||||
|
|
||||||
memset(&const_state->offsets, ~0, sizeof(const_state->offsets));
|
memset(&const_state->offsets, ~0, sizeof(const_state->offsets));
|
||||||
|
const_state->required_consts_aligment_vec4 = 1;
|
||||||
|
|
||||||
ir3_nir_scan_driver_consts(compiler, nir, const_state);
|
ir3_nir_scan_driver_consts(compiler, nir, const_state);
|
||||||
|
|
||||||
|
|
@ -1270,7 +1281,8 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
|
||||||
/* offset cannot be 0 for vs params loaded by CP_DRAW_INDIRECT_MULTI */
|
/* offset cannot be 0 for vs params loaded by CP_DRAW_INDIRECT_MULTI */
|
||||||
if (v->type == MESA_SHADER_VERTEX && compiler->gen >= 6)
|
if (v->type == MESA_SHADER_VERTEX && compiler->gen >= 6)
|
||||||
constoff = MAX2(constoff, 1);
|
constoff = MAX2(constoff, 1);
|
||||||
constoff = align(constoff, upload_unit);
|
constoff = ir3_align_constoff(const_state, constoff, upload_unit);
|
||||||
|
|
||||||
const_state->offsets.driver_param = constoff;
|
const_state->offsets.driver_param = constoff;
|
||||||
|
|
||||||
constoff += align(const_state->num_driver_params / 4, upload_unit);
|
constoff += align(const_state->num_driver_params / 4, upload_unit);
|
||||||
|
|
@ -1322,3 +1334,14 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
|
||||||
|
|
||||||
assert(constoff <= ir3_max_const(v));
|
assert(constoff <= ir3_max_const(v));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t
|
||||||
|
ir3_const_state_get_free_space(const struct ir3_shader_variant *v,
|
||||||
|
const struct ir3_const_state *const_state)
|
||||||
|
{
|
||||||
|
uint32_t free_space_vec4 = ir3_max_const(v) - const_state->offsets.immediate;
|
||||||
|
free_space_vec4 =
|
||||||
|
(free_space_vec4 / const_state->required_consts_aligment_vec4) *
|
||||||
|
const_state->required_consts_aligment_vec4;
|
||||||
|
return free_space_vec4;
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -63,6 +63,8 @@ void ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s);
|
||||||
|
|
||||||
void ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
|
void ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
|
||||||
struct ir3_const_state *const_state);
|
struct ir3_const_state *const_state);
|
||||||
|
uint32_t ir3_const_state_get_free_space(const struct ir3_shader_variant *v,
|
||||||
|
const struct ir3_const_state *const_state);
|
||||||
bool ir3_nir_lower_load_constant(nir_shader *nir, struct ir3_shader_variant *v);
|
bool ir3_nir_lower_load_constant(nir_shader *nir, struct ir3_shader_variant *v);
|
||||||
void ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v);
|
void ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v);
|
||||||
bool ir3_nir_lower_ubo_loads(nir_shader *nir, struct ir3_shader_variant *v);
|
bool ir3_nir_lower_ubo_loads(nir_shader *nir, struct ir3_shader_variant *v);
|
||||||
|
|
|
||||||
|
|
@ -560,7 +560,8 @@ ir3_nir_lower_const_global_loads(nir_shader *nir, struct ir3_shader_variant *v)
|
||||||
.preamble_size = const_state->preamble_size,
|
.preamble_size = const_state->preamble_size,
|
||||||
};
|
};
|
||||||
ir3_setup_const_state(nir, v, &worst_case_const_state);
|
ir3_setup_const_state(nir, v, &worst_case_const_state);
|
||||||
max_upload = (ir3_max_const(v) - worst_case_const_state.offsets.immediate) * 16;
|
max_upload =
|
||||||
|
ir3_const_state_get_free_space(v, &worst_case_const_state) * 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ir3_ubo_analysis_state state = {};
|
struct ir3_ubo_analysis_state state = {};
|
||||||
|
|
@ -636,7 +637,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v)
|
||||||
};
|
};
|
||||||
ir3_setup_const_state(nir, v, &worst_case_const_state);
|
ir3_setup_const_state(nir, v, &worst_case_const_state);
|
||||||
const uint32_t max_upload =
|
const uint32_t max_upload =
|
||||||
(ir3_max_const(v) - worst_case_const_state.offsets.immediate) * 16;
|
ir3_const_state_get_free_space(v, &worst_case_const_state) * 16;
|
||||||
|
|
||||||
memset(state, 0, sizeof(*state));
|
memset(state, 0, sizeof(*state));
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -288,7 +288,7 @@ ir3_nir_opt_preamble(nir_shader *nir, struct ir3_shader_variant *v)
|
||||||
} else {
|
} else {
|
||||||
struct ir3_const_state worst_case_const_state = {};
|
struct ir3_const_state worst_case_const_state = {};
|
||||||
ir3_setup_const_state(nir, v, &worst_case_const_state);
|
ir3_setup_const_state(nir, v, &worst_case_const_state);
|
||||||
max_size = (ir3_max_const(v) - worst_case_const_state.offsets.immediate) * 4;
|
max_size = ir3_const_state_get_free_space(v, &worst_case_const_state) * 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (max_size == 0)
|
if (max_size == 0)
|
||||||
|
|
|
||||||
|
|
@ -186,6 +186,12 @@ struct ir3_const_state {
|
||||||
struct ir3_driver_ubo driver_params_ubo;
|
struct ir3_driver_ubo driver_params_ubo;
|
||||||
struct ir3_driver_ubo primitive_map_ubo, primitive_param_ubo;
|
struct ir3_driver_ubo primitive_map_ubo, primitive_param_ubo;
|
||||||
|
|
||||||
|
/* Optional const allocations (preamble, UBO, etc.) may shift the required
|
||||||
|
* consts more than they expect. The free space for optional allocations
|
||||||
|
* should respect required_consts_aligment_vec4.
|
||||||
|
*/
|
||||||
|
uint32_t required_consts_aligment_vec4;
|
||||||
|
|
||||||
int32_t constant_data_dynamic_offsets;
|
int32_t constant_data_dynamic_offsets;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue