ir3: Make allocation of consts more generic and order independent

The order of allocation was backed into ir3_setup_const_state and
some other parts of ir3, which is rather brittle.

And don't assume offsets for consts in other part of code, their order
and offset calculation is not guaranteed.

This also potentially fixes indirect UBO effect on constlen size.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32140>
This commit is contained in:
Danylo Piliaiev 2024-10-24 19:18:16 +02:00 committed by Marge Bot
parent fc56823cf0
commit 922ef8e720
11 changed files with 313 additions and 87 deletions

View file

@ -1263,7 +1263,7 @@ emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
struct ir3_builder *b = &ctx->build;
struct ir3_instruction *base_lo, *base_hi, *addr, *src0, *src1;
const struct ir3_const_state *const_state = ir3_const_state(ctx->so);
unsigned ubo = regid(const_state->offsets.ubo, 0);
unsigned ubo = ir3_const_reg(const_state, IR3_CONST_ALLOC_UBO_PTRS, 0);
const unsigned ptrsz = ir3_pointer_size(ctx->compiler);
int off = 0;
@ -1283,9 +1283,10 @@ emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
* at least big enough to cover all the UBO addresses, since the
* assembler won't know what the max address reg is.
*/
ctx->so->constlen =
MAX2(ctx->so->constlen,
const_state->offsets.ubo + (ctx->s->info.num_ubos * ptrsz));
ctx->so->constlen = MAX2(
ctx->so->constlen,
const_state->allocs.consts[IR3_CONST_ALLOC_UBO_PTRS].offset_vec4 +
(ctx->s->info.num_ubos * ptrsz));
}
/* note: on 32bit gpu's base_hi is ignored and DCE'd */
@ -2686,15 +2687,8 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
if (ctx->compiler->has_scalar_alu && !intr->def.divergent)
dst[i]->dsts[0]->flags |= IR3_REG_SHARED;
}
/* NOTE: if relative addressing is used, we set
* constlen in the compiler (to worst-case value)
* since we don't know in the assembler what the max
* addr reg value can be:
*/
ctx->so->constlen =
MAX2(ctx->so->constlen,
ctx->so->shader_options.num_reserved_user_consts +
const_state->ubo_state.size / 16);
ctx->has_relative_load_const_ir3 = true;
}
break;
@ -5942,6 +5936,27 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
ctx->so->per_samp = ctx->s->info.fs.uses_sample_shading;
if (ctx->has_relative_load_const_ir3) {
/* NOTE: if relative addressing is used, we set
* constlen in the compiler (to worst-case value)
* since we don't know in the assembler what the max
* addr reg value can be:
*/
const struct ir3_const_state *const_state = ir3_const_state(ctx->so);
const enum ir3_const_alloc_type rel_const_srcs[] = {
IR3_CONST_ALLOC_INLINE_UNIFORM_ADDRS, IR3_CONST_ALLOC_UBO_RANGES,
IR3_CONST_ALLOC_PREAMBLE, IR3_CONST_ALLOC_GLOBAL};
for (int i = 0; i < ARRAY_SIZE(rel_const_srcs); i++) {
const struct ir3_const_allocation *const_alloc =
&const_state->allocs.consts[rel_const_srcs[i]];
if (const_alloc->size_vec4 > 0) {
ctx->so->constlen =
MAX2(ctx->so->constlen,
const_alloc->offset_vec4 + const_alloc->size_vec4);
}
}
}
if (ctx->so->type == MESA_SHADER_FRAGMENT &&
compiler->fs_must_have_non_zero_constlen_quirk) {
so->constlen = MAX2(so->constlen, 4);

View file

@ -141,6 +141,8 @@ struct ir3_context {
unsigned prefetch_limit;
bool has_relative_load_const_ir3;
/* set if we encounter something we can't handle yet, so we
* can bail cleanly and fallback to TGSI compiler f/e
*/

View file

@ -1151,6 +1151,10 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so,
progress |= OPT(s, ir3_nir_lower_io_offsets);
if (!so->binning_pass) {
ir3_const_alloc_all_reserved_space(&ir3_const_state_mut(so)->allocs);
}
if (progress)
ir3_optimize_loop(so->compiler, options, s);
@ -1364,6 +1368,61 @@ ir3_align_constoff(struct ir3_const_state *const_state, unsigned constoff,
return constoff;
}
void
ir3_const_alloc(struct ir3_const_allocations *const_alloc,
enum ir3_const_alloc_type type, uint32_t size_vec4,
uint32_t align_vec4)
{
struct ir3_const_allocation *alloc = &const_alloc->consts[type];
assert(alloc->size_vec4 == 0);
const_alloc->max_const_offset_vec4 =
align(const_alloc->max_const_offset_vec4, align_vec4);
alloc->size_vec4 = size_vec4;
alloc->offset_vec4 = const_alloc->max_const_offset_vec4;
const_alloc->max_const_offset_vec4 += size_vec4;
}
void
ir3_const_reserve_space(struct ir3_const_allocations *const_alloc,
enum ir3_const_alloc_type type, uint32_t size_vec4,
uint32_t align_vec4)
{
struct ir3_const_allocation *alloc = &const_alloc->consts[type];
assert(alloc->size_vec4 == 0 && alloc->reserved_size_vec4 == 0);
alloc->reserved_size_vec4 = size_vec4;
alloc->reserved_align_vec4 = align_vec4;
/* Be pessimistic here and assume the worst case alignment is needed */
const_alloc->reserved_vec4 += size_vec4 + align_vec4 - 1;
}
void
ir3_const_free_reserved_space(struct ir3_const_allocations *const_alloc,
enum ir3_const_alloc_type type)
{
struct ir3_const_allocation *alloc = &const_alloc->consts[type];
assert(const_alloc->reserved_vec4 >= alloc->reserved_size_vec4);
const_alloc->reserved_vec4 -=
alloc->reserved_size_vec4 + alloc->reserved_align_vec4 - 1;
alloc->reserved_size_vec4 = 0;
}
void
ir3_const_alloc_all_reserved_space(struct ir3_const_allocations *const_alloc)
{
for (int i = 0; i < IR3_CONST_ALLOC_MAX; i++) {
if (const_alloc->consts[i].reserved_size_vec4 > 0) {
ir3_const_alloc(const_alloc, i,
const_alloc->consts[i].reserved_size_vec4,
const_alloc->consts[i].reserved_align_vec4);
const_alloc->consts[i].reserved_size_vec4 = 0;
}
}
const_alloc->reserved_vec4 = 0;
}
/* Sets up the variant-dependent constant state for the ir3_shader. Note
* that it is also used from ir3_nir_analyze_ubo_ranges() to figure out the
* maximum number of driver params that would eventually be used, to leave
@ -1374,6 +1433,7 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
struct ir3_const_state *const_state)
{
struct ir3_compiler *compiler = v->compiler;
unsigned ptrsz = ir3_pointer_size(compiler);
memset(&const_state->offsets, ~0, sizeof(const_state->offsets));
const_state->required_consts_aligment_vec4 = 1;
@ -1388,16 +1448,8 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
const_state->num_ubos = nir->info.num_ubos;
assert((const_state->ubo_state.size % 16) == 0);
unsigned constoff = v->shader_options.num_reserved_user_consts +
const_state->ubo_state.size / 16 +
const_state->preamble_size +
const_state->global_size;
unsigned ptrsz = ir3_pointer_size(compiler);
if (const_state->num_ubos > 0 && compiler->gen < 6) {
const_state->offsets.ubo = constoff;
constoff += align(const_state->num_ubos * ptrsz, 4) / 4;
}
unsigned constoff = const_state->allocs.max_const_offset_vec4;
if (const_state->image_dims.count > 0) {
unsigned cnt = const_state->image_dims.count;
@ -1482,9 +1534,12 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
uint32_t
ir3_const_state_get_free_space(const struct ir3_shader_variant *v,
const struct ir3_const_state *const_state)
const struct ir3_const_state *const_state,
uint32_t align_vec4)
{
uint32_t free_space_vec4 = ir3_max_const(v) - const_state->offsets.immediate;
uint32_t free_space_vec4 =
ir3_max_const(v) - align(const_state->offsets.immediate, align_vec4) -
const_state->allocs.reserved_vec4;
free_space_vec4 =
(free_space_vec4 / const_state->required_consts_aligment_vec4) *
const_state->required_consts_aligment_vec4;

View file

@ -77,7 +77,18 @@ void ir3_nir_lower_variant(struct ir3_shader_variant *so,
void ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
struct ir3_const_state *const_state);
uint32_t ir3_const_state_get_free_space(const struct ir3_shader_variant *v,
const struct ir3_const_state *const_state);
const struct ir3_const_state *const_state,
uint32_t align_vec4);
void ir3_const_alloc(struct ir3_const_allocations *const_alloc,
enum ir3_const_alloc_type type, uint32_t size_vec4,
uint32_t align_vec4);
void ir3_const_reserve_space(struct ir3_const_allocations *const_alloc,
enum ir3_const_alloc_type type,
uint32_t size_vec4, uint32_t align_vec4);
void ir3_const_free_reserved_space(struct ir3_const_allocations *const_alloc,
enum ir3_const_alloc_type type);
void ir3_const_alloc_all_reserved_space(struct ir3_const_allocations *const_alloc);
bool ir3_nir_lower_load_constant(nir_shader *nir, struct ir3_shader_variant *v);
void ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v);
bool ir3_nir_lower_ubo_loads(nir_shader *nir, struct ir3_shader_variant *v);

View file

@ -553,15 +553,18 @@ ir3_nir_lower_const_global_loads(nir_shader *nir, struct ir3_shader_variant *v)
return false;
unsigned max_upload;
uint32_t global_offset = 0;
if (v->binning_pass) {
max_upload = const_state->global_size * 16;
} else {
struct ir3_const_state worst_case_const_state = {
.preamble_size = const_state->preamble_size,
};
ir3_setup_const_state(nir, v, &worst_case_const_state);
max_upload =
ir3_const_state_get_free_space(v, &worst_case_const_state) * 16;
const_state->allocs.consts[IR3_CONST_ALLOC_GLOBAL].size_vec4 * 16;
global_offset =
const_state->allocs.consts[IR3_CONST_ALLOC_GLOBAL].offset_vec4 * 16;
} else {
struct ir3_const_state *const_state = ir3_const_state_mut(v);
ir3_setup_const_state(nir, v, const_state);
global_offset = const_state->allocs.max_const_offset_vec4 * 16;
max_upload =
ir3_const_state_get_free_space(v, const_state, 1) * 16;
}
struct ir3_ubo_analysis_state state = {};
@ -581,7 +584,6 @@ ir3_nir_lower_const_global_loads(nir_shader *nir, struct ir3_shader_variant *v)
}
}
uint32_t global_offset = v->shader_options.num_reserved_user_consts * 16;
assign_offsets(&state, global_offset, max_upload);
bool progress = copy_global_to_uniform(nir, &state);
@ -612,8 +614,10 @@ ir3_nir_lower_const_global_loads(nir_shader *nir, struct ir3_shader_variant *v)
}
}
if (!v->binning_pass)
ir3_const_state_mut(v)->global_size = DIV_ROUND_UP(state.size, 16);
if (!v->binning_pass) {
ir3_const_alloc(&ir3_const_state_mut(v)->allocs, IR3_CONST_ALLOC_GLOBAL,
DIV_ROUND_UP(state.size, 16), 1);
}
return progress;
}
@ -625,19 +629,26 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v)
struct ir3_ubo_analysis_state *state = &const_state->ubo_state;
struct ir3_compiler *compiler = v->compiler;
if (compiler->gen < 6 && const_state->num_ubos > 0) {
uint32_t ptrs_vec4 =
align(const_state->num_ubos * ir3_pointer_size(compiler), 4) / 4;
ir3_const_reserve_space(&const_state->allocs, IR3_CONST_ALLOC_UBO_PTRS,
ptrs_vec4, 1);
}
uint32_t align_vec4 = compiler->load_shader_consts_via_preamble
? 1
: compiler->const_upload_unit;
/* Limit our uploads to the amount of constant buffer space available in
* the hardware, minus what the shader compiler may need for various
* driver params. We do this UBO-to-push-constant before the real
* allocation of the driver params' const space, because UBO pointers can
* be driver params but this pass usually eliminatings them.
*/
struct ir3_const_state worst_case_const_state = {
.preamble_size = const_state->preamble_size,
.global_size = const_state->global_size,
};
ir3_setup_const_state(nir, v, &worst_case_const_state);
ir3_setup_const_state(nir, v, const_state);
const uint32_t max_upload =
ir3_const_state_get_free_space(v, &worst_case_const_state) * 16;
ir3_const_state_get_free_space(v, const_state, align_vec4) * 16;
memset(state, 0, sizeof(*state));
@ -660,9 +671,13 @@ ir3_nir_analyze_ubo_ranges(nir_shader *nir, struct ir3_shader_variant *v)
}
}
uint32_t ubo_offset = v->shader_options.num_reserved_user_consts * 16 +
const_state->global_size * 16;
uint32_t ubo_offset = align(const_state->allocs.max_const_offset_vec4, align_vec4) * 16;
assign_offsets(state, ubo_offset, max_upload);
uint32_t upload_vec4 = state->size / 16;
if (upload_vec4 > 0)
ir3_const_alloc(&ir3_const_state_mut(v)->allocs,
IR3_CONST_ALLOC_UBO_RANGES, upload_vec4, align_vec4);
}
bool
@ -709,6 +724,22 @@ ir3_nir_lower_ubo_loads(nir_shader *nir, struct ir3_shader_variant *v)
if (nir->info.first_ubo_is_default_ubo && !push_ubos && !has_preamble)
nir->info.num_ubos = num_ubos;
if (!v->binning_pass) {
ir3_const_state_mut(v)->num_ubos = num_ubos;
if (compiler->gen < 6)
ir3_const_free_reserved_space(&ir3_const_state_mut(v)->allocs,
IR3_CONST_ALLOC_UBO_PTRS);
if (compiler->gen < 6 && const_state->num_ubos > 0) {
uint32_t upload_ptrs_vec4 =
align(const_state->num_ubos * ir3_pointer_size(compiler), 4) / 4;
ir3_const_alloc(&ir3_const_state_mut(v)->allocs,
IR3_CONST_ALLOC_UBO_PTRS, upload_ptrs_vec4, 1);
}
}
if (compiler->has_preamble && push_ubos)
progress |= copy_ubo_to_uniform(
nir, const_state, !compiler->load_shader_consts_via_preamble);

View file

@ -284,11 +284,13 @@ ir3_nir_opt_preamble(nir_shader *nir, struct ir3_shader_variant *v)
unsigned max_size;
if (v->binning_pass) {
const struct ir3_const_state *const_state = ir3_const_state(v);
max_size = const_state->preamble_size * 4;
max_size =
const_state->allocs.consts[IR3_CONST_ALLOC_PREAMBLE].size_vec4 * 4;
} else {
struct ir3_const_state worst_case_const_state = {};
ir3_setup_const_state(nir, v, &worst_case_const_state);
max_size = ir3_const_state_get_free_space(v, &worst_case_const_state) * 4;
struct ir3_const_state *const_state = ir3_const_state_mut(v);
ir3_setup_const_state(nir, v, const_state);
max_size = ir3_const_state_get_free_space(
v, const_state, v->compiler->const_upload_unit) * 4;
}
if (max_size == 0)
@ -312,8 +314,10 @@ ir3_nir_opt_preamble(nir_shader *nir, struct ir3_shader_variant *v)
progress |= nir_opt_preamble(nir, &options, &size);
if (!v->binning_pass) {
struct ir3_const_state *const_state = ir3_const_state_mut(v);
const_state->preamble_size = DIV_ROUND_UP(size, 4);
uint32_t preamble_size_vec4 =
align(DIV_ROUND_UP(size, 4), v->compiler->const_upload_unit);
ir3_const_alloc(&ir3_const_state_mut(v)->allocs, IR3_CONST_ALLOC_PREAMBLE,
preamble_size_vec4, v->compiler->const_upload_unit);
}
return progress;
@ -605,8 +609,9 @@ ir3_nir_opt_prefetch_descriptors(nir_shader *nir, struct ir3_shader_variant *v)
bool progress = false;
struct prefetch_state state = {};
nir_def **preamble_defs = calloc(const_state->preamble_size * 4,
sizeof(nir_def *));
nir_def **preamble_defs =
calloc(const_state->allocs.consts[IR3_CONST_ALLOC_PREAMBLE].size_vec4 * 4,
sizeof(nir_def *));
/* Collect preamble defs. This is useful if the computation of the offset has
* already been hoisted to the preamble.
@ -622,7 +627,9 @@ ir3_nir_opt_prefetch_descriptors(nir_shader *nir, struct ir3_shader_variant *v)
if (intrin->intrinsic != nir_intrinsic_store_preamble)
continue;
assert(nir_intrinsic_base(intrin) < const_state->preamble_size * 4);
assert(
nir_intrinsic_base(intrin) <
const_state->allocs.consts[IR3_CONST_ALLOC_PREAMBLE].size_vec4 * 4);
preamble_defs[nir_intrinsic_base(intrin)] = intrin->src[0].ssa;
}
}
@ -719,9 +726,10 @@ ir3_nir_lower_preamble(nir_shader *nir, struct ir3_shader_variant *v)
/* First, lower load/store_preamble. */
const struct ir3_const_state *const_state = ir3_const_state(v);
unsigned preamble_base = v->shader_options.num_reserved_user_consts * 4 +
const_state->ubo_state.size / 4 + const_state->global_size * 4;
unsigned preamble_size = const_state->preamble_size * 4;
unsigned preamble_base =
const_state->allocs.consts[IR3_CONST_ALLOC_PREAMBLE].offset_vec4 * 4;
unsigned preamble_size =
const_state->allocs.consts[IR3_CONST_ALLOC_PREAMBLE].size_vec4 * 4;
BITSET_DECLARE(promoted_to_float, preamble_size);
memset(promoted_to_float, 0, sizeof(promoted_to_float));

View file

@ -340,6 +340,7 @@ alloc_variant(struct ir3_shader *shader, const struct ir3_shader_key *key,
if (!v->binning_pass) {
v->const_state = rzalloc_size(v, sizeof(*v->const_state));
v->const_state->allocs = shader->options.const_allocs;
v->const_state->push_consts_type = shader->options.push_consts_type;
v->const_state->consts_ubo.idx = -1;
v->const_state->driver_params_ubo.idx = -1;
@ -754,6 +755,31 @@ output_name(struct ir3_shader_variant *so, int i)
}
}
static const char *
ir3_const_alloc_type_to_string(enum ir3_const_alloc_type type)
{
switch (type) {
case IR3_CONST_ALLOC_PUSH_CONSTS:
return "push_consts";
case IR3_CONST_ALLOC_DYN_DESCRIPTOR_OFFSET:
return "dyn_descriptor_offset";
case IR3_CONST_ALLOC_INLINE_UNIFORM_ADDRS:
return "inline_uniform_addresses";
case IR3_CONST_ALLOC_DRIVER_PARAMS:
return "driver_params";
case IR3_CONST_ALLOC_UBO_RANGES:
return "ubo_ranges";
case IR3_CONST_ALLOC_PREAMBLE:
return "preamble";
case IR3_CONST_ALLOC_GLOBAL:
return "global";
case IR3_CONST_ALLOC_UBO_PTRS:
return "ubo_ptrs";
default:
return "unknown";
}
}
static void
dump_const_state(struct ir3_shader_variant *so, FILE *out)
{
@ -763,8 +789,16 @@ dump_const_state(struct ir3_shader_variant *so, FILE *out)
fprintf(out, "; num_ubos: %u\n", cs->num_ubos);
fprintf(out, "; num_driver_params: %u\n", cs->num_driver_params);
fprintf(out, "; offsets:\n");
if (cs->offsets.ubo != ~0)
fprintf(out, "; ubo: c%u.x\n", cs->offsets.ubo);
for (uint32_t i = 0; i < IR3_CONST_ALLOC_MAX; i++) {
if (cs->allocs.consts[i].size_vec4) {
fprintf(out, "; %-26s c%u.x (%u vec4)\n",
ir3_const_alloc_type_to_string(i),
cs->allocs.consts[i].offset_vec4,
cs->allocs.consts[i].size_vec4);
}
}
if (cs->offsets.image_dims != ~0)
fprintf(out, "; image_dims: c%u.x\n", cs->offsets.image_dims);
if (cs->offsets.kernel_params != ~0)

View file

@ -185,6 +185,53 @@ struct ir3_driver_ubo {
uint32_t size;
};
enum ir3_const_alloc_type {
/* Vulkan, push consts. */
IR3_CONST_ALLOC_PUSH_CONSTS = 0,
/* Vulkan, offsets required to calculate offsets of descriptors with dynamic
* offsets.
*/
IR3_CONST_ALLOC_DYN_DESCRIPTOR_OFFSET = 1,
/* Vulkan, addresses of inline uniform buffers, to which we fallback when
* their size is unknown.
*/
IR3_CONST_ALLOC_INLINE_UNIFORM_ADDRS = 2,
/* Common, stage-specific params uploaded by the driver/HW. */
IR3_CONST_ALLOC_DRIVER_PARAMS = 3,
/* Common, UBOs lowered to consts. */
IR3_CONST_ALLOC_UBO_RANGES = 4,
/* Common, consts produced by a preamble to be used in a main shader. */
IR3_CONST_ALLOC_PREAMBLE = 5,
/* Vulkan, inline uniforms loaded into consts in the preamble.*/
IR3_CONST_ALLOC_GLOBAL = 6,
/* OpenGL, pre-a6xx; pointers to UBOs */
IR3_CONST_ALLOC_UBO_PTRS = 7,
IR3_CONST_ALLOC_MAX = 8,
};
struct ir3_const_allocation {
uint32_t offset_vec4;
uint32_t size_vec4;
uint32_t reserved_size_vec4;
uint32_t reserved_align_vec4;
};
struct ir3_const_allocations {
struct ir3_const_allocation consts[IR3_CONST_ALLOC_MAX];
uint32_t max_const_offset_vec4;
uint32_t reserved_vec4;
};
static inline bool
ir3_const_can_upload(const struct ir3_const_allocations *const_alloc,
enum ir3_const_alloc_type type,
uint32_t shader_const_size_vec4)
{
return const_alloc->consts[type].size_vec4 > 0 &&
const_alloc->consts[type].offset_vec4 < shader_const_size_vec4;
}
/**
* Describes the layout of shader consts in the const register file.
*
@ -192,8 +239,7 @@ struct ir3_driver_ubo {
* that pointer size (ubo, etc) changes depending on generation.
*
* + user consts: only used for turnip push consts
* + lowered UBO ranges
* + preamble consts
* + Optional consts: ubo ranges, preamble, global, etc.
* + UBO addresses: turnip is bindless and these are wasted
* + image dimensions: a5xx only; needed to calculate pixel offset, but only
* for images that have image_{load,store,size,atomic*} intrinsics
@ -228,11 +274,8 @@ struct ir3_const_state {
*/
uint32_t required_consts_aligment_vec4;
int32_t constant_data_dynamic_offsets;
struct {
/* user const start at zero */
unsigned ubo;
/* Required consts, cannot negotiate their size */
unsigned image_dims;
unsigned kernel_params;
unsigned driver_param;
@ -242,6 +285,8 @@ struct ir3_const_state {
unsigned immediate;
} offsets;
struct ir3_const_allocations allocs;
struct {
uint32_t mask; /* bitmask of images that have image_store */
uint32_t count; /* number of consts allocated */
@ -257,9 +302,6 @@ struct ir3_const_state {
unsigned immediates_size;
uint32_t *immediates;
unsigned preamble_size;
unsigned global_size;
/* State of ubo access lowered to push consts: */
struct ir3_ubo_analysis_state ubo_state;
enum ir3_push_consts_type push_consts_type;
@ -555,7 +597,6 @@ struct ir3_shader_nir_options {
};
struct ir3_shader_options {
unsigned num_reserved_user_consts;
/* What API-visible wavesizes are allowed. Even if only double wavesize is
* allowed, we may still use the smaller wavesize "under the hood" and the
* application simply sees the upper half as always disabled.
@ -570,6 +611,9 @@ struct ir3_shader_options {
uint32_t push_consts_base;
uint32_t push_consts_dwords;
/* Some const allocations are required at API level. */
struct ir3_const_allocations const_allocs;
struct ir3_shader_nir_options nir_options;
};
@ -1042,6 +1086,16 @@ ir3_max_const(const struct ir3_shader_variant *v)
uint16_t ir3_const_find_imm(struct ir3_shader_variant *v, uint32_t imm);
uint16_t ir3_const_add_imm(struct ir3_shader_variant *v, uint32_t imm);
static inline unsigned
ir3_const_reg(const struct ir3_const_state *const_state,
enum ir3_const_alloc_type type,
unsigned offset)
{
unsigned n = const_state->allocs.consts[type].offset_vec4;
assert(const_state->allocs.consts[type].size_vec4 != 0);
return regid(n + offset / 4, offset % 4);
}
/* Return true if a variant may need to be recompiled due to exceeding the
* maximum "safe" constlen.
*/

View file

@ -799,10 +799,14 @@ compile_shader(struct tu_device *dev, struct nir_shader *nir,
nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir->info.stage);
nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, nir->info.stage);
struct ir3_const_allocations const_allocs = {};
if (consts > 0)
ir3_const_alloc(&const_allocs, IR3_CONST_ALLOC_UBO_RANGES, align(consts, 8), 1);
const struct ir3_shader_options options = {
.num_reserved_user_consts = align(consts, 8),
.api_wavesize = IR3_SINGLE_OR_DOUBLE,
.real_wavesize = IR3_SINGLE_OR_DOUBLE,
.const_allocs = const_allocs,
};
ir3_finalize_nir(dev->compiler, &options.nir_options, nir);

View file

@ -826,7 +826,7 @@ tu_lower_io(nir_shader *shader, struct tu_device *dev,
const struct tu_pipeline_layout *layout,
uint32_t read_only_input_attachments,
bool dynamic_renderpass,
unsigned *reserved_consts_vec4_out)
struct ir3_const_allocations *const_allocs)
{
tu_shader->const_state.push_consts = (struct tu_push_constant_range) {
.lo = 0,
@ -848,10 +848,13 @@ tu_lower_io(nir_shader *shader, struct tu_device *dev,
}
struct tu_const_state *const_state = &tu_shader->const_state;
unsigned reserved_consts_vec4 =
unsigned push_consts_vec4 =
align(DIV_ROUND_UP(const_state->push_consts.dwords, 4),
dev->compiler->const_upload_unit);
ir3_const_alloc(const_allocs, IR3_CONST_ALLOC_PUSH_CONSTS,
push_consts_vec4, 1);
bool unknown_dynamic_size = false;
bool unknown_dynamic_offset = false;
for (unsigned i = 0; i < layout->num_sets; i++) {
@ -867,9 +870,12 @@ tu_lower_io(nir_shader *shader, struct tu_device *dev,
}
if (unknown_dynamic_offset) {
const_state->dynamic_offset_loc = reserved_consts_vec4 * 4;
const_state->dynamic_offset_loc =
const_allocs->max_const_offset_vec4 * 4;
assert(dev->physical_device->reserved_set_idx >= 0);
reserved_consts_vec4 += DIV_ROUND_UP(dev->physical_device->reserved_set_idx, 4);
ir3_const_alloc(
const_allocs, IR3_CONST_ALLOC_DYN_DESCRIPTOR_OFFSET,
DIV_ROUND_UP(dev->physical_device->reserved_set_idx, 4), 1);
} else {
const_state->dynamic_offset_loc = UINT32_MAX;
}
@ -877,6 +883,7 @@ tu_lower_io(nir_shader *shader, struct tu_device *dev,
/* Reserve space for inline uniforms, so we can always load them from
* constants and not setup a UBO descriptor for them.
*/
size_t ldgk_consts = 0;
bool use_ldg_k =
dev->physical_device->info->a7xx.load_inline_uniforms_via_preamble_ldgk;
for (unsigned set = 0; set < layout->num_sets; set++) {
@ -918,20 +925,23 @@ tu_lower_io(nir_shader *shader, struct tu_device *dev,
assert(const_state->num_inline_ubos < ARRAY_SIZE(const_state->ubos));
unsigned size_vec4 = push_address ? 1 : DIV_ROUND_UP(binding->size, 16);
const_state->ubos[const_state->num_inline_ubos++] = (struct tu_inline_ubo) {
.base = set,
.offset = binding->offset,
.push_address = push_address,
.const_offset_vec4 = reserved_consts_vec4,
.size_vec4 = size_vec4,
};
const_state->ubos[const_state->num_inline_ubos++] =
(struct tu_inline_ubo) {
.base = set,
.offset = binding->offset,
.push_address = push_address,
.const_offset_vec4 =
const_allocs->max_const_offset_vec4 + ldgk_consts,
.size_vec4 = size_vec4,
};
if (!use_ldg_k)
reserved_consts_vec4 += align(size_vec4, dev->compiler->const_upload_unit);
if (!use_ldg_k) {
ldgk_consts += align(size_vec4, dev->compiler->const_upload_unit);
}
}
}
*reserved_consts_vec4_out = reserved_consts_vec4;
ir3_const_alloc(const_allocs, IR3_CONST_ALLOC_INLINE_UNIFORM_ADDRS, ldgk_consts, 1);
struct lower_instr_params params = {
.dev = dev,
@ -2527,10 +2537,10 @@ tu_shader_create(struct tu_device *dev,
NIR_PASS_V(nir, nir_lower_mem_access_bit_sizes, &options);
}
unsigned reserved_consts_vec4 = 0;
struct ir3_const_allocations const_allocs = {};
NIR_PASS_V(nir, tu_lower_io, dev, shader, layout,
key->read_only_input_attachments, key->dynamic_renderpass,
&reserved_consts_vec4);
&const_allocs);
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
@ -2540,12 +2550,12 @@ tu_shader_create(struct tu_device *dev,
ir3_finalize_nir(dev->compiler, &nir_options, nir);
const struct ir3_shader_options options = {
.num_reserved_user_consts = reserved_consts_vec4,
.api_wavesize = key->api_wavesize,
.real_wavesize = key->real_wavesize,
.push_consts_type = shader->const_state.push_consts.type,
.push_consts_base = shader->const_state.push_consts.lo,
.push_consts_dwords = shader->const_state.push_consts.dwords,
.const_allocs = const_allocs,
.nir_options = nir_options,
};

View file

@ -188,7 +188,8 @@ ir3_emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
{
const struct ir3_const_state *const_state = ir3_const_state(v);
uint32_t offset = const_state->offsets.ubo;
uint32_t offset =
const_state->allocs.consts[IR3_CONST_ALLOC_UBO_PTRS].offset_vec4;
/* a6xx+ uses UBO state and ldc instead of pointers emitted in
* const state and ldg:
@ -196,7 +197,8 @@ ir3_emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
if (ctx->screen->gen >= 6)
return;
if (v->constlen > offset) {
if (ir3_const_can_upload(&const_state->allocs, IR3_CONST_ALLOC_UBO_PTRS,
v->constlen)) {
uint32_t params = const_state->num_ubos;
uint32_t offsets[params];
struct fd_bo *bos[params];