ir3: split immediate state from rest of const state

On a7xx, the immediates that get promoted to const registers will be
initialized in the preamble instead of being part of the const state. So
technically, we won't need the immediate state that is part of the const
state anymore on a7xx. However, it is still a convenient place for
ir3_cp to store the immediates that should be promoted to const
registers before they are lowered to the preamble.

This causes one issue: the binning pass isn't allowed to modify the
const state while it's perfectly fine for it to use different immediates
compared to the non-binning pass on a7xx. Even pre-a7xx this is fine as
long as the size of the immediate buffer is the same.

To allow the binning pass to modify its immediate state while keeping
its const state immutable, this commit moves the fields related to
immediates into a new struct. Runtime checks are added to enforce that
the size of the immediate buffer is the same for the binning and
non-binning variant pre-a7xx.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32454>
This commit is contained in:
Job Noorman 2025-03-03 09:58:54 +01:00 committed by Marge Bot
parent f9fc0fc8fd
commit 68ab25e6d4
9 changed files with 130 additions and 63 deletions

View file

@ -208,7 +208,8 @@ cs_const_emit(struct fd_ringbuffer *ring, struct kernel *kernel,
const struct ir3_const_state *const_state = ir3_const_state(v); const struct ir3_const_state *const_state = ir3_const_state(v);
uint32_t base = const_state->allocs.max_const_offset_vec4; uint32_t base = const_state->allocs.max_const_offset_vec4;
int size = DIV_ROUND_UP(const_state->immediates_count, 4); const struct ir3_imm_const_state *imm_state = &v->imm_state;
int size = DIV_ROUND_UP(imm_state->count, 4);
/* truncate size to avoid writing constants that shader /* truncate size to avoid writing constants that shader
* does not use: * does not use:
@ -220,7 +221,7 @@ cs_const_emit(struct fd_ringbuffer *ring, struct kernel *kernel,
size *= 4; size *= 4;
if (size > 0) { if (size > 0) {
emit_const(ring, kernel, base, size, const_state->immediates); emit_const(ring, kernel, base, size, imm_state->values);
} }
} }

View file

@ -317,14 +317,15 @@ cs_const_emit(struct fd_ringbuffer *ring, struct kernel *kernel,
const struct ir3_const_state *const_state = ir3_const_state(v); const struct ir3_const_state *const_state = ir3_const_state(v);
uint32_t base = const_state->allocs.max_const_offset_vec4; uint32_t base = const_state->allocs.max_const_offset_vec4;
int size = DIV_ROUND_UP(const_state->immediates_count, 4); const struct ir3_imm_const_state *imm_state = &v->imm_state;
int size = DIV_ROUND_UP(imm_state->count, 4);
if (ir3_kernel->info.numwg != INVALID_REG) { if (ir3_kernel->info.numwg != INVALID_REG) {
assert((ir3_kernel->info.numwg & 0x3) == 0); assert((ir3_kernel->info.numwg & 0x3) == 0);
int idx = ir3_kernel->info.numwg >> 2; int idx = ir3_kernel->info.numwg >> 2;
const_state->immediates[idx * 4 + 0] = grid[0]; imm_state->values[idx * 4 + 0] = grid[0];
const_state->immediates[idx * 4 + 1] = grid[1]; imm_state->values[idx * 4 + 1] = grid[1];
const_state->immediates[idx * 4 + 2] = grid[2]; imm_state->values[idx * 4 + 2] = grid[2];
} }
for (int i = 0; i < MAX_BUFS; i++) { for (int i = 0; i < MAX_BUFS; i++) {
@ -334,8 +335,8 @@ cs_const_emit(struct fd_ringbuffer *ring, struct kernel *kernel,
uint64_t iova = fd_bo_get_iova(kernel->bufs[i]); uint64_t iova = fd_bo_get_iova(kernel->bufs[i]);
const_state->immediates[idx * 4 + 1] = iova >> 32; imm_state->values[idx * 4 + 1] = iova >> 32;
const_state->immediates[idx * 4 + 0] = (iova << 32) >> 32; imm_state->values[idx * 4 + 0] = (iova << 32) >> 32;
} }
} }
@ -349,7 +350,7 @@ cs_const_emit(struct fd_ringbuffer *ring, struct kernel *kernel,
size *= 4; size *= 4;
if (size > 0) { if (size > 0) {
emit_const<CHIP>(ring, base, size, const_state->immediates); emit_const<CHIP>(ring, base, size, imm_state->values);
} }
} }

View file

@ -5754,6 +5754,18 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
so->need_full_quad = true; so->need_full_quad = true;
} }
/* If we're uploading immediates as part of the const state, we need to make
* sure the binning and non-binning variants have the same size. Pre-allocate
* for the binning variant, ir3_const_add_imm will ensure we don't add more
* immediates than allowed.
*/
if (so->binning_pass && !compiler->load_shader_consts_via_preamble &&
so->nonbinning->imm_state.size) {
ASSERTED bool success =
ir3_const_ensure_imm_size(so, so->nonbinning->imm_state.size);
assert(success);
}
ir3_debug_print(ir, "AFTER: nir->ir3"); ir3_debug_print(ir, "AFTER: nir->ir3");
ir3_validate(ir); ir3_validate(ir);

View file

@ -117,10 +117,14 @@ retrieve_variant(struct blob_reader *blob, struct ir3_shader_variant *v)
if (!v->binning_pass) { if (!v->binning_pass) {
blob_copy_bytes(blob, v->const_state, sizeof(*v->const_state)); blob_copy_bytes(blob, v->const_state, sizeof(*v->const_state));
unsigned immeds_sz = v->const_state->immediates_size * }
sizeof(v->const_state->immediates[0]);
v->const_state->immediates = ralloc_size(v->const_state, immeds_sz); if (!v->compiler->load_shader_consts_via_preamble) {
blob_copy_bytes(blob, v->const_state->immediates, immeds_sz); v->imm_state.size = blob_read_uint32(blob);
v->imm_state.count = v->imm_state.size;
uint32_t immeds_sz = v->imm_state.size * sizeof(v->imm_state.values[0]);
v->imm_state.values = ralloc_size(v, immeds_sz);
blob_copy_bytes(blob, v->imm_state.values, immeds_sz);
} }
} }
@ -139,9 +143,15 @@ store_variant(struct blob *blob, const struct ir3_shader_variant *v)
if (!v->binning_pass) { if (!v->binning_pass) {
blob_write_bytes(blob, v->const_state, sizeof(*v->const_state)); blob_write_bytes(blob, v->const_state, sizeof(*v->const_state));
unsigned immeds_sz = v->const_state->immediates_size * }
sizeof(v->const_state->immediates[0]);
blob_write_bytes(blob, v->const_state->immediates, immeds_sz); /* When load_shader_consts_via_preamble, immediates are loaded in the
* preamble and hence part of bin.
*/
if (!v->compiler->load_shader_consts_via_preamble) {
blob_write_uint32(blob, v->imm_state.size);
uint32_t immeds_sz = v->imm_state.size * sizeof(v->imm_state.values[0]);
blob_write_bytes(blob, v->imm_state.values, immeds_sz);
} }
} }

View file

@ -243,23 +243,23 @@ static void fixup_cat5_s2en(void)
static void add_const(unsigned reg, unsigned c0, unsigned c1, unsigned c2, unsigned c3) static void add_const(unsigned reg, unsigned c0, unsigned c1, unsigned c2, unsigned c3)
{ {
struct ir3_const_state *const_state = ir3_const_state_mut(variant); struct ir3_imm_const_state *imm_state = &variant->imm_state;
assert((reg & 0x7) == 0); assert((reg & 0x7) == 0);
int idx = reg >> (1 + 2); /* low bit is half vs full, next two bits are swiz */ int idx = reg >> (1 + 2); /* low bit is half vs full, next two bits are swiz */
if (idx * 4 + 4 > const_state->immediates_size) { if (idx * 4 + 4 > imm_state->size) {
const_state->immediates = rerzalloc(const_state, imm_state->values = rerzalloc(imm_state,
const_state->immediates, imm_state->values,
__typeof__(const_state->immediates[0]), __typeof__(imm_state->values[0]),
const_state->immediates_size, imm_state->size,
idx * 4 + 4); idx * 4 + 4);
for (unsigned i = const_state->immediates_size; i < idx * 4; i++) for (unsigned i = imm_state->size; i < idx * 4; i++)
const_state->immediates[i] = 0xd0d0d0d0; imm_state->values[i] = 0xd0d0d0d0;
const_state->immediates_size = const_state->immediates_count = idx * 4 + 4; imm_state->size = imm_state->count = idx * 4 + 4;
} }
const_state->immediates[idx * 4 + 0] = c0; imm_state->values[idx * 4 + 0] = c0;
const_state->immediates[idx * 4 + 1] = c1; imm_state->values[idx * 4 + 1] = c1;
const_state->immediates[idx * 4 + 2] = c2; imm_state->values[idx * 4 + 2] = c2;
const_state->immediates[idx * 4 + 3] = c3; imm_state->values[idx * 4 + 3] = c3;
} }
static void add_buf_init_val(uint32_t val) static void add_buf_init_val(uint32_t val)

View file

@ -25,6 +25,45 @@
#include "disasm.h" #include "disasm.h"
bool
ir3_const_ensure_imm_size(struct ir3_shader_variant *v, unsigned size)
{
struct ir3_imm_const_state *imm_state = &v->imm_state;
if (size <= imm_state->size) {
return true;
}
/* Immediates are uploaded in units of vec4 so make sure our buffer is large
* enough.
*/
size = ALIGN(size, 4);
/* Pre-a7xx, the immediates that get lowered to const registers are
* emitted as part of the const state so the total size of immediates
* should be the same for the binning and non-binning variants. Make sure
* we don't increase the size beyond that of the non-binning variant.
*/
if (v->binning_pass && !v->compiler->load_shader_consts_via_preamble &&
size > v->nonbinning->imm_state.size) {
return false;
}
imm_state->values =
rerzalloc(v, imm_state->values, __typeof__(imm_state->values[0]),
imm_state->size, size);
imm_state->size = size;
/* Note that ir3 printing relies on having groups of 4 dwords, so we fill the
* unused slots with a dummy value.
*/
for (int i = imm_state->count; i < imm_state->size; i++) {
imm_state->values[i] = 0xd0d0d0d0;
}
return true;
}
static uint16_t static uint16_t
const_imm_index_to_reg(const struct ir3_const_state *const_state, unsigned i) const_imm_index_to_reg(const struct ir3_const_state *const_state, unsigned i)
{ {
@ -35,9 +74,10 @@ uint16_t
ir3_const_find_imm(struct ir3_shader_variant *v, uint32_t imm) ir3_const_find_imm(struct ir3_shader_variant *v, uint32_t imm)
{ {
const struct ir3_const_state *const_state = ir3_const_state(v); const struct ir3_const_state *const_state = ir3_const_state(v);
const struct ir3_imm_const_state *imm_state = &v->imm_state;
for (unsigned i = 0; i < const_state->immediates_count; i++) { for (unsigned i = 0; i < imm_state->count; i++) {
if (const_state->immediates[i] == imm) if (imm_state->values[i] == imm)
return const_imm_index_to_reg(const_state, i); return const_imm_index_to_reg(const_state, i);
} }
@ -47,36 +87,26 @@ ir3_const_find_imm(struct ir3_shader_variant *v, uint32_t imm)
uint16_t uint16_t
ir3_const_add_imm(struct ir3_shader_variant *v, uint32_t imm) ir3_const_add_imm(struct ir3_shader_variant *v, uint32_t imm)
{ {
struct ir3_const_state *const_state = ir3_const_state_mut(v); const struct ir3_const_state *const_state = ir3_const_state(v);
struct ir3_imm_const_state *imm_state = &v->imm_state;
/* Reallocate for 4 more elements whenever it's necessary. Note that ir3 /* Reallocate for 4 more elements whenever it's necessary. */
* printing relies on having groups of 4 dwords, so we fill the unused if (imm_state->count == imm_state->size) {
* slots with a dummy value. if (!ir3_const_ensure_imm_size(v, imm_state->size + 4)) {
*/ return INVALID_CONST_REG;
if (const_state->immediates_count == const_state->immediates_size) {
const_state->immediates = rerzalloc(
const_state, const_state->immediates,
__typeof__(const_state->immediates[0]), const_state->immediates_size,
const_state->immediates_size + 4);
const_state->immediates_size += 4;
for (int i = const_state->immediates_count;
i < const_state->immediates_size; i++) {
const_state->immediates[i] = 0xd0d0d0d0;
} }
} }
/* Add on a new immediate to be pushed, if we have space left in the /* Add on a new immediate to be pushed, if we have space left in the
* constbuf. * constbuf.
*/ */
if (const_state->allocs.max_const_offset_vec4 + if (const_state->allocs.max_const_offset_vec4 + imm_state->count / 4 >=
const_state->immediates_count / 4 >=
ir3_max_const(v)) { ir3_max_const(v)) {
return INVALID_CONST_REG; return INVALID_CONST_REG;
} }
const_state->immediates[const_state->immediates_count] = imm; imm_state->values[imm_state->count] = imm;
return const_imm_index_to_reg(const_state, const_state->immediates_count++); return const_imm_index_to_reg(const_state, imm_state->count++);
} }
int int
@ -912,14 +942,15 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
} }
const struct ir3_const_state *const_state = ir3_const_state(so); const struct ir3_const_state *const_state = ir3_const_state(so);
for (i = 0; i < DIV_ROUND_UP(const_state->immediates_count, 4); i++) { const struct ir3_imm_const_state *imm_state = &so->imm_state;
for (i = 0; i < DIV_ROUND_UP(imm_state->count, 4); i++) {
fprintf(out, "@const(c%d.x)\t", fprintf(out, "@const(c%d.x)\t",
const_state->allocs.max_const_offset_vec4 + i); const_state->allocs.max_const_offset_vec4 + i);
fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n", fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
const_state->immediates[i * 4 + 0], imm_state->values[i * 4 + 0],
const_state->immediates[i * 4 + 1], imm_state->values[i * 4 + 1],
const_state->immediates[i * 4 + 2], imm_state->values[i * 4 + 2],
const_state->immediates[i * 4 + 3]); imm_state->values[i * 4 + 3]);
} }
ir3_isa_disasm(bin, so->info.sizedwords * 4, out, ir3_isa_disasm(bin, so->info.sizedwords * 4, out,

View file

@ -260,6 +260,12 @@ struct ir3_const_image_dims {
uint32_t off[IR3_MAX_SHADER_IMAGES]; uint32_t off[IR3_MAX_SHADER_IMAGES];
}; };
struct ir3_imm_const_state {
unsigned size;
unsigned count;
uint32_t *values;
};
/** /**
* Describes the layout of shader consts in the const register file * Describes the layout of shader consts in the const register file
* and additional info about individual allocations. * and additional info about individual allocations.
@ -289,10 +295,6 @@ struct ir3_const_state {
struct ir3_const_image_dims image_dims; struct ir3_const_image_dims image_dims;
unsigned immediates_count;
unsigned immediates_size;
uint32_t *immediates;
/* State of ubo access lowered to push consts: */ /* State of ubo access lowered to push consts: */
struct ir3_ubo_analysis_state ubo_state; struct ir3_ubo_analysis_state ubo_state;
enum ir3_push_consts_type push_consts_type; enum ir3_push_consts_type push_consts_type;
@ -674,6 +676,13 @@ struct ir3_shader_variant {
struct ir3_const_state *const_state; struct ir3_const_state *const_state;
/* Immediate values that will be lowered to const registers. Before a7xx,
* this will be uploaded together with the const_state. From a7xx on (where
* load_shader_consts_via_preamble is true), this will be lowered to const
* stores in the preamble.
*/
struct ir3_imm_const_state imm_state;
/* /*
* The following macros are used by the shader disk cache save/ * The following macros are used by the shader disk cache save/
* restore paths to serialize/deserialize the variant. Any * restore paths to serialize/deserialize the variant. Any
@ -1083,6 +1092,7 @@ ir3_max_const(const struct ir3_shader_variant *v)
return _ir3_max_const(v, v->key.safe_constlen); return _ir3_max_const(v, v->key.safe_constlen);
} }
bool ir3_const_ensure_imm_size(struct ir3_shader_variant *v, unsigned size);
uint16_t ir3_const_find_imm(struct ir3_shader_variant *v, uint32_t imm); uint16_t ir3_const_find_imm(struct ir3_shader_variant *v, uint32_t imm);
uint16_t ir3_const_add_imm(struct ir3_shader_variant *v, uint32_t imm); uint16_t ir3_const_add_imm(struct ir3_shader_variant *v, uint32_t imm);

View file

@ -1157,7 +1157,8 @@ tu_xs_get_immediates_packet_size_dwords(const struct ir3_shader_variant *xs)
{ {
const struct ir3_const_state *const_state = ir3_const_state(xs); const struct ir3_const_state *const_state = ir3_const_state(xs);
uint32_t base = const_state->allocs.max_const_offset_vec4; uint32_t base = const_state->allocs.max_const_offset_vec4;
int32_t size = DIV_ROUND_UP(const_state->immediates_count, 4); const struct ir3_imm_const_state *imm_state = &xs->imm_state;
int32_t size = DIV_ROUND_UP(imm_state->count, 4);
/* truncate size to avoid writing constants that shader /* truncate size to avoid writing constants that shader
* does not use: * does not use:
@ -1365,6 +1366,7 @@ tu6_emit_xs(struct tu_cs *cs,
const struct ir3_const_state *const_state = ir3_const_state(xs); const struct ir3_const_state *const_state = ir3_const_state(xs);
uint32_t base = const_state->allocs.max_const_offset_vec4; uint32_t base = const_state->allocs.max_const_offset_vec4;
const struct ir3_imm_const_state *imm_state = &xs->imm_state;
unsigned immediate_size = tu_xs_get_immediates_packet_size_dwords(xs); unsigned immediate_size = tu_xs_get_immediates_packet_size_dwords(xs);
if (immediate_size > 0) { if (immediate_size > 0) {
@ -1378,7 +1380,7 @@ tu6_emit_xs(struct tu_cs *cs,
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
tu_cs_emit_array(cs, const_state->immediates, immediate_size); tu_cs_emit_array(cs, imm_state->values, immediate_size);
} }
if (const_state->consts_ubo.idx != -1) { if (const_state->consts_ubo.idx != -1) {

View file

@ -300,7 +300,7 @@ ir3_emit_immediates(const struct ir3_shader_variant *v,
{ {
const struct ir3_const_state *const_state = ir3_const_state(v); const struct ir3_const_state *const_state = ir3_const_state(v);
uint32_t base = const_state->allocs.max_const_offset_vec4; uint32_t base = const_state->allocs.max_const_offset_vec4;
int size = DIV_ROUND_UP(const_state->immediates_count, 4); int size = DIV_ROUND_UP(v->imm_state.count, 4);
/* truncate size to avoid writing constants that shader /* truncate size to avoid writing constants that shader
* does not use: * does not use:
@ -312,7 +312,7 @@ ir3_emit_immediates(const struct ir3_shader_variant *v,
size *= 4; size *= 4;
if (size > 0) if (size > 0)
emit_const_user(ring, v, base, size, const_state->immediates); emit_const_user(ring, v, base, size, v->imm_state.values);
/* NIR constant data has the same lifetime as immediates, so upload it /* NIR constant data has the same lifetime as immediates, so upload it
* now, too. * now, too.