mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-06 00:10:20 +01:00
ir3: Support variants with different constlen's
This provides the mechanism for compiling variants with a reduced constlen. The next patch provides the policy for choosing which to reduce. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5607>
This commit is contained in:
parent
4554b946c3
commit
9edff0cfd4
7 changed files with 83 additions and 12 deletions
|
|
@ -65,6 +65,36 @@ struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id
|
|||
if (compiler->gpu_id >= 600) {
|
||||
compiler->mergedregs_set = ir3_ra_alloc_reg_set(compiler, true);
|
||||
compiler->samgq_workaround = true;
|
||||
/* a6xx split the pipeline state into geometry and fragment state, in
|
||||
* order to let the VS run ahead of the FS. As a result there are now
|
||||
* separate const files for the the fragment shader and everything
|
||||
* else, and separate limits. There seems to be a shared limit, but
|
||||
* it's higher than the vert or frag limits.
|
||||
*
|
||||
* TODO: The shared limit seems to be different on different on
|
||||
* different models.
|
||||
*/
|
||||
compiler->max_const_pipeline = 640;
|
||||
compiler->max_const_frag = 512;
|
||||
compiler->max_const_geom = 512;
|
||||
compiler->max_const_safe = 128;
|
||||
|
||||
/* Compute shaders don't share a const file with the FS. Instead they
|
||||
* have their own file, which is smaller than the FS one.
|
||||
*
|
||||
* TODO: is this true on earlier gen's?
|
||||
*/
|
||||
compiler->max_const_compute = 256;
|
||||
} else {
|
||||
compiler->max_const_pipeline = 512;
|
||||
compiler->max_const_geom = 512;
|
||||
compiler->max_const_frag = 512;
|
||||
compiler->max_const_compute = 512;
|
||||
|
||||
/* Note: this will have to change if/when we support tess+GS on
|
||||
* earlier gen's.
|
||||
*/
|
||||
compiler->max_const_safe = 256;
|
||||
}
|
||||
|
||||
if (compiler->gpu_id >= 400) {
|
||||
|
|
@ -74,10 +104,6 @@ struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id
|
|||
compiler->unminify_coords = false;
|
||||
compiler->txf_ms_with_isaml = false;
|
||||
compiler->array_index_add_half = true;
|
||||
/* Some a6xxs can apparently do 640 consts, but not all. Need to
|
||||
* characterize this better across GPUs
|
||||
*/
|
||||
compiler->max_const = 512;
|
||||
compiler->const_upload_unit = 4;
|
||||
} else {
|
||||
/* no special handling for "flat" */
|
||||
|
|
@ -86,7 +112,6 @@ struct ir3_compiler * ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id
|
|||
compiler->unminify_coords = true;
|
||||
compiler->txf_ms_with_isaml = true;
|
||||
compiler->array_index_add_half = false;
|
||||
compiler->max_const = 512;
|
||||
compiler->const_upload_unit = 8;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -70,10 +70,24 @@ struct ir3_compiler {
|
|||
*/
|
||||
bool samgq_workaround;
|
||||
|
||||
/* on a3xx, the limit on const access is lower than later gens (in vec4
|
||||
* units):
|
||||
/* The maximum number of constants, in vec4's, across the entire graphics
|
||||
* pipeline.
|
||||
*/
|
||||
uint32_t max_const;
|
||||
uint16_t max_const_pipeline;
|
||||
|
||||
/* The maximum number of constants, in vec4's, for VS+HS+DS+GS. */
|
||||
uint16_t max_const_geom;
|
||||
|
||||
/* The maximum number of constants, in vec4's, for FS. */
|
||||
uint16_t max_const_frag;
|
||||
|
||||
/* A "safe" max constlen that can be applied to each shader in the
|
||||
* pipeline which we guarantee will never exceed any combined limits.
|
||||
*/
|
||||
uint16_t max_const_safe;
|
||||
|
||||
/* The maximum number of constants, in vec4's, for compute shaders. */
|
||||
uint16_t max_const_compute;
|
||||
|
||||
/* on a3xx, the unit of indirect const load is higher than later gens (in
|
||||
* vec4 units):
|
||||
|
|
|
|||
|
|
@ -220,12 +220,11 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n,
|
|||
}
|
||||
|
||||
if (i == const_state->immediate_idx) {
|
||||
struct ir3_compiler *compiler = instr->block->shader->compiler;
|
||||
/* Add on a new immediate to be pushed, if we have space left in the
|
||||
* constbuf.
|
||||
*/
|
||||
if (const_state->offsets.immediate + const_state->immediate_idx / 4 >=
|
||||
compiler->max_const)
|
||||
ir3_max_const(ctx->so))
|
||||
return false;
|
||||
|
||||
swiz = i % 4;
|
||||
|
|
|
|||
|
|
@ -552,5 +552,5 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
|
|||
|
||||
const_state->offsets.immediate = constoff;
|
||||
|
||||
assert(constoff <= compiler->max_const);
|
||||
assert(constoff <= ir3_max_const(v));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -372,7 +372,7 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v)
|
|||
*/
|
||||
struct ir3_const_state worst_case_const_state = { };
|
||||
ir3_setup_const_state(nir, v, &worst_case_const_state);
|
||||
const uint32_t max_upload = (compiler->max_const -
|
||||
const uint32_t max_upload = (ir3_max_const(v) -
|
||||
worst_case_const_state.offsets.immediate) * 16;
|
||||
|
||||
uint32_t offset = v->shader->num_reserved_user_consts * 16;
|
||||
|
|
|
|||
|
|
@ -285,6 +285,8 @@ ir3_setup_used_key(struct ir3_shader *shader)
|
|||
*/
|
||||
key->has_per_samp = true;
|
||||
|
||||
key->safe_constlen = true;
|
||||
|
||||
if (info->stage == MESA_SHADER_FRAGMENT) {
|
||||
key->fsaturate_s = ~0;
|
||||
key->fsaturate_t = ~0;
|
||||
|
|
|
|||
|
|
@ -299,6 +299,12 @@ struct ir3_shader_key {
|
|||
unsigned tessellation : 2;
|
||||
|
||||
unsigned has_gs : 1;
|
||||
|
||||
/* Whether this variant sticks to the "safe" maximum constlen,
|
||||
* which guarantees that the combined stages will never go over
|
||||
* the limit:
|
||||
*/
|
||||
unsigned safe_constlen : 1;
|
||||
};
|
||||
uint32_t global;
|
||||
};
|
||||
|
|
@ -369,6 +375,9 @@ ir3_shader_key_changes_fs(struct ir3_shader_key *key, struct ir3_shader_key *las
|
|||
if (last_key->ucp_enables != key->ucp_enables)
|
||||
return true;
|
||||
|
||||
if (last_key->safe_constlen != key->safe_constlen)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -391,6 +400,9 @@ ir3_shader_key_changes_vs(struct ir3_shader_key *key, struct ir3_shader_key *las
|
|||
if (last_key->ucp_enables != key->ucp_enables)
|
||||
return true;
|
||||
|
||||
if (last_key->safe_constlen != key->safe_constlen)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -668,6 +680,25 @@ ir3_const_state(const struct ir3_shader_variant *v)
|
|||
return v->const_state;
|
||||
}
|
||||
|
||||
/* Given a variant, calculate the maximum constlen it can have.
|
||||
*/
|
||||
|
||||
static inline unsigned
|
||||
ir3_max_const(const struct ir3_shader_variant *v)
|
||||
{
|
||||
const struct ir3_compiler *compiler = v->shader->compiler;
|
||||
|
||||
if (v->shader->type == MESA_SHADER_COMPUTE) {
|
||||
return compiler->max_const_compute;
|
||||
} else if (v->key.safe_constlen) {
|
||||
return compiler->max_const_safe;
|
||||
} else if (v->shader->type == MESA_SHADER_FRAGMENT) {
|
||||
return compiler->max_const_frag;
|
||||
} else {
|
||||
return compiler->max_const_geom;
|
||||
}
|
||||
}
|
||||
|
||||
void * ir3_shader_assemble(struct ir3_shader_variant *v);
|
||||
struct ir3_shader_variant * ir3_shader_get_variant(struct ir3_shader *shader,
|
||||
const struct ir3_shader_key *key, bool binning_pass, bool *created);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue