mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 19:50:11 +01:00
ir3: split immediate state from rest of const state
On a7xx, the immediates that get promoted to const registers will be initialized in the preamble instead of being part of the const state. So technically, we won't need the immediate state that is part of the const state anymore on a7xx. However, it is still a convenient place for ir3_cp to store the immediates that should be promoted to const registers before they are lowered to the preamble. This causes one issue: the binning pass isn't allowed to modify the const state while it's perfectly fine for it to use different immediates compared to the non-binning pass on a7xx. Even pre-a7xx this is fine as long as the size of the immediate buffer is the same. To allow the binning pass to modify its immediate state while keeping its const state immutable, this commit moves the fields related to immediates into a new struct. Runtime checks are added to enforce that the size of the immediate buffer is the same for the binning and non-binning variant pre-a7xx. Signed-off-by: Job Noorman <jnoorman@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32454>
This commit is contained in:
parent
f9fc0fc8fd
commit
68ab25e6d4
9 changed files with 130 additions and 63 deletions
|
|
@ -208,7 +208,8 @@ cs_const_emit(struct fd_ringbuffer *ring, struct kernel *kernel,
|
|||
|
||||
const struct ir3_const_state *const_state = ir3_const_state(v);
|
||||
uint32_t base = const_state->allocs.max_const_offset_vec4;
|
||||
int size = DIV_ROUND_UP(const_state->immediates_count, 4);
|
||||
const struct ir3_imm_const_state *imm_state = &v->imm_state;
|
||||
int size = DIV_ROUND_UP(imm_state->count, 4);
|
||||
|
||||
/* truncate size to avoid writing constants that shader
|
||||
* does not use:
|
||||
|
|
@ -220,7 +221,7 @@ cs_const_emit(struct fd_ringbuffer *ring, struct kernel *kernel,
|
|||
size *= 4;
|
||||
|
||||
if (size > 0) {
|
||||
emit_const(ring, kernel, base, size, const_state->immediates);
|
||||
emit_const(ring, kernel, base, size, imm_state->values);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -317,14 +317,15 @@ cs_const_emit(struct fd_ringbuffer *ring, struct kernel *kernel,
|
|||
|
||||
const struct ir3_const_state *const_state = ir3_const_state(v);
|
||||
uint32_t base = const_state->allocs.max_const_offset_vec4;
|
||||
int size = DIV_ROUND_UP(const_state->immediates_count, 4);
|
||||
const struct ir3_imm_const_state *imm_state = &v->imm_state;
|
||||
int size = DIV_ROUND_UP(imm_state->count, 4);
|
||||
|
||||
if (ir3_kernel->info.numwg != INVALID_REG) {
|
||||
assert((ir3_kernel->info.numwg & 0x3) == 0);
|
||||
int idx = ir3_kernel->info.numwg >> 2;
|
||||
const_state->immediates[idx * 4 + 0] = grid[0];
|
||||
const_state->immediates[idx * 4 + 1] = grid[1];
|
||||
const_state->immediates[idx * 4 + 2] = grid[2];
|
||||
imm_state->values[idx * 4 + 0] = grid[0];
|
||||
imm_state->values[idx * 4 + 1] = grid[1];
|
||||
imm_state->values[idx * 4 + 2] = grid[2];
|
||||
}
|
||||
|
||||
for (int i = 0; i < MAX_BUFS; i++) {
|
||||
|
|
@ -334,8 +335,8 @@ cs_const_emit(struct fd_ringbuffer *ring, struct kernel *kernel,
|
|||
|
||||
uint64_t iova = fd_bo_get_iova(kernel->bufs[i]);
|
||||
|
||||
const_state->immediates[idx * 4 + 1] = iova >> 32;
|
||||
const_state->immediates[idx * 4 + 0] = (iova << 32) >> 32;
|
||||
imm_state->values[idx * 4 + 1] = iova >> 32;
|
||||
imm_state->values[idx * 4 + 0] = (iova << 32) >> 32;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -349,7 +350,7 @@ cs_const_emit(struct fd_ringbuffer *ring, struct kernel *kernel,
|
|||
size *= 4;
|
||||
|
||||
if (size > 0) {
|
||||
emit_const<CHIP>(ring, base, size, const_state->immediates);
|
||||
emit_const<CHIP>(ring, base, size, imm_state->values);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -5754,6 +5754,18 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
|
|||
so->need_full_quad = true;
|
||||
}
|
||||
|
||||
/* If we're uploading immediates as part of the const state, we need to make
|
||||
* sure the binning and non-binning variants have the same size. Pre-allocate
|
||||
* for the binning variant, ir3_const_add_imm will ensure we don't add more
|
||||
* immediates than allowed.
|
||||
*/
|
||||
if (so->binning_pass && !compiler->load_shader_consts_via_preamble &&
|
||||
so->nonbinning->imm_state.size) {
|
||||
ASSERTED bool success =
|
||||
ir3_const_ensure_imm_size(so, so->nonbinning->imm_state.size);
|
||||
assert(success);
|
||||
}
|
||||
|
||||
ir3_debug_print(ir, "AFTER: nir->ir3");
|
||||
ir3_validate(ir);
|
||||
|
||||
|
|
|
|||
|
|
@ -117,10 +117,14 @@ retrieve_variant(struct blob_reader *blob, struct ir3_shader_variant *v)
|
|||
|
||||
if (!v->binning_pass) {
|
||||
blob_copy_bytes(blob, v->const_state, sizeof(*v->const_state));
|
||||
unsigned immeds_sz = v->const_state->immediates_size *
|
||||
sizeof(v->const_state->immediates[0]);
|
||||
v->const_state->immediates = ralloc_size(v->const_state, immeds_sz);
|
||||
blob_copy_bytes(blob, v->const_state->immediates, immeds_sz);
|
||||
}
|
||||
|
||||
if (!v->compiler->load_shader_consts_via_preamble) {
|
||||
v->imm_state.size = blob_read_uint32(blob);
|
||||
v->imm_state.count = v->imm_state.size;
|
||||
uint32_t immeds_sz = v->imm_state.size * sizeof(v->imm_state.values[0]);
|
||||
v->imm_state.values = ralloc_size(v, immeds_sz);
|
||||
blob_copy_bytes(blob, v->imm_state.values, immeds_sz);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -139,9 +143,15 @@ store_variant(struct blob *blob, const struct ir3_shader_variant *v)
|
|||
|
||||
if (!v->binning_pass) {
|
||||
blob_write_bytes(blob, v->const_state, sizeof(*v->const_state));
|
||||
unsigned immeds_sz = v->const_state->immediates_size *
|
||||
sizeof(v->const_state->immediates[0]);
|
||||
blob_write_bytes(blob, v->const_state->immediates, immeds_sz);
|
||||
}
|
||||
|
||||
/* When load_shader_consts_via_preamble, immediates are loaded in the
|
||||
* preamble and hence part of bin.
|
||||
*/
|
||||
if (!v->compiler->load_shader_consts_via_preamble) {
|
||||
blob_write_uint32(blob, v->imm_state.size);
|
||||
uint32_t immeds_sz = v->imm_state.size * sizeof(v->imm_state.values[0]);
|
||||
blob_write_bytes(blob, v->imm_state.values, immeds_sz);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -243,23 +243,23 @@ static void fixup_cat5_s2en(void)
|
|||
|
||||
static void add_const(unsigned reg, unsigned c0, unsigned c1, unsigned c2, unsigned c3)
|
||||
{
|
||||
struct ir3_const_state *const_state = ir3_const_state_mut(variant);
|
||||
struct ir3_imm_const_state *imm_state = &variant->imm_state;
|
||||
assert((reg & 0x7) == 0);
|
||||
int idx = reg >> (1 + 2); /* low bit is half vs full, next two bits are swiz */
|
||||
if (idx * 4 + 4 > const_state->immediates_size) {
|
||||
const_state->immediates = rerzalloc(const_state,
|
||||
const_state->immediates,
|
||||
__typeof__(const_state->immediates[0]),
|
||||
const_state->immediates_size,
|
||||
if (idx * 4 + 4 > imm_state->size) {
|
||||
imm_state->values = rerzalloc(imm_state,
|
||||
imm_state->values,
|
||||
__typeof__(imm_state->values[0]),
|
||||
imm_state->size,
|
||||
idx * 4 + 4);
|
||||
for (unsigned i = const_state->immediates_size; i < idx * 4; i++)
|
||||
const_state->immediates[i] = 0xd0d0d0d0;
|
||||
const_state->immediates_size = const_state->immediates_count = idx * 4 + 4;
|
||||
for (unsigned i = imm_state->size; i < idx * 4; i++)
|
||||
imm_state->values[i] = 0xd0d0d0d0;
|
||||
imm_state->size = imm_state->count = idx * 4 + 4;
|
||||
}
|
||||
const_state->immediates[idx * 4 + 0] = c0;
|
||||
const_state->immediates[idx * 4 + 1] = c1;
|
||||
const_state->immediates[idx * 4 + 2] = c2;
|
||||
const_state->immediates[idx * 4 + 3] = c3;
|
||||
imm_state->values[idx * 4 + 0] = c0;
|
||||
imm_state->values[idx * 4 + 1] = c1;
|
||||
imm_state->values[idx * 4 + 2] = c2;
|
||||
imm_state->values[idx * 4 + 3] = c3;
|
||||
}
|
||||
|
||||
static void add_buf_init_val(uint32_t val)
|
||||
|
|
|
|||
|
|
@ -25,6 +25,45 @@
|
|||
|
||||
#include "disasm.h"
|
||||
|
||||
bool
|
||||
ir3_const_ensure_imm_size(struct ir3_shader_variant *v, unsigned size)
|
||||
{
|
||||
struct ir3_imm_const_state *imm_state = &v->imm_state;
|
||||
|
||||
if (size <= imm_state->size) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Immediates are uploaded in units of vec4 so make sure our buffer is large
|
||||
* enough.
|
||||
*/
|
||||
size = ALIGN(size, 4);
|
||||
|
||||
/* Pre-a7xx, the immediates that get lowered to const registers are
|
||||
* emitted as part of the const state so the total size of immediates
|
||||
* should be the same for the binning and non-binning variants. Make sure
|
||||
* we don't increase the size beyond that of the non-binning variant.
|
||||
*/
|
||||
if (v->binning_pass && !v->compiler->load_shader_consts_via_preamble &&
|
||||
size > v->nonbinning->imm_state.size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
imm_state->values =
|
||||
rerzalloc(v, imm_state->values, __typeof__(imm_state->values[0]),
|
||||
imm_state->size, size);
|
||||
imm_state->size = size;
|
||||
|
||||
/* Note that ir3 printing relies on having groups of 4 dwords, so we fill the
|
||||
* unused slots with a dummy value.
|
||||
*/
|
||||
for (int i = imm_state->count; i < imm_state->size; i++) {
|
||||
imm_state->values[i] = 0xd0d0d0d0;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static uint16_t
|
||||
const_imm_index_to_reg(const struct ir3_const_state *const_state, unsigned i)
|
||||
{
|
||||
|
|
@ -35,9 +74,10 @@ uint16_t
|
|||
ir3_const_find_imm(struct ir3_shader_variant *v, uint32_t imm)
|
||||
{
|
||||
const struct ir3_const_state *const_state = ir3_const_state(v);
|
||||
const struct ir3_imm_const_state *imm_state = &v->imm_state;
|
||||
|
||||
for (unsigned i = 0; i < const_state->immediates_count; i++) {
|
||||
if (const_state->immediates[i] == imm)
|
||||
for (unsigned i = 0; i < imm_state->count; i++) {
|
||||
if (imm_state->values[i] == imm)
|
||||
return const_imm_index_to_reg(const_state, i);
|
||||
}
|
||||
|
||||
|
|
@ -47,36 +87,26 @@ ir3_const_find_imm(struct ir3_shader_variant *v, uint32_t imm)
|
|||
uint16_t
|
||||
ir3_const_add_imm(struct ir3_shader_variant *v, uint32_t imm)
|
||||
{
|
||||
struct ir3_const_state *const_state = ir3_const_state_mut(v);
|
||||
const struct ir3_const_state *const_state = ir3_const_state(v);
|
||||
struct ir3_imm_const_state *imm_state = &v->imm_state;
|
||||
|
||||
/* Reallocate for 4 more elements whenever it's necessary. Note that ir3
|
||||
* printing relies on having groups of 4 dwords, so we fill the unused
|
||||
* slots with a dummy value.
|
||||
*/
|
||||
if (const_state->immediates_count == const_state->immediates_size) {
|
||||
const_state->immediates = rerzalloc(
|
||||
const_state, const_state->immediates,
|
||||
__typeof__(const_state->immediates[0]), const_state->immediates_size,
|
||||
const_state->immediates_size + 4);
|
||||
const_state->immediates_size += 4;
|
||||
|
||||
for (int i = const_state->immediates_count;
|
||||
i < const_state->immediates_size; i++) {
|
||||
const_state->immediates[i] = 0xd0d0d0d0;
|
||||
/* Reallocate for 4 more elements whenever it's necessary. */
|
||||
if (imm_state->count == imm_state->size) {
|
||||
if (!ir3_const_ensure_imm_size(v, imm_state->size + 4)) {
|
||||
return INVALID_CONST_REG;
|
||||
}
|
||||
}
|
||||
|
||||
/* Add on a new immediate to be pushed, if we have space left in the
|
||||
* constbuf.
|
||||
*/
|
||||
if (const_state->allocs.max_const_offset_vec4 +
|
||||
const_state->immediates_count / 4 >=
|
||||
if (const_state->allocs.max_const_offset_vec4 + imm_state->count / 4 >=
|
||||
ir3_max_const(v)) {
|
||||
return INVALID_CONST_REG;
|
||||
}
|
||||
|
||||
const_state->immediates[const_state->immediates_count] = imm;
|
||||
return const_imm_index_to_reg(const_state, const_state->immediates_count++);
|
||||
imm_state->values[imm_state->count] = imm;
|
||||
return const_imm_index_to_reg(const_state, imm_state->count++);
|
||||
}
|
||||
|
||||
int
|
||||
|
|
@ -912,14 +942,15 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
|
|||
}
|
||||
|
||||
const struct ir3_const_state *const_state = ir3_const_state(so);
|
||||
for (i = 0; i < DIV_ROUND_UP(const_state->immediates_count, 4); i++) {
|
||||
const struct ir3_imm_const_state *imm_state = &so->imm_state;
|
||||
for (i = 0; i < DIV_ROUND_UP(imm_state->count, 4); i++) {
|
||||
fprintf(out, "@const(c%d.x)\t",
|
||||
const_state->allocs.max_const_offset_vec4 + i);
|
||||
fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
|
||||
const_state->immediates[i * 4 + 0],
|
||||
const_state->immediates[i * 4 + 1],
|
||||
const_state->immediates[i * 4 + 2],
|
||||
const_state->immediates[i * 4 + 3]);
|
||||
imm_state->values[i * 4 + 0],
|
||||
imm_state->values[i * 4 + 1],
|
||||
imm_state->values[i * 4 + 2],
|
||||
imm_state->values[i * 4 + 3]);
|
||||
}
|
||||
|
||||
ir3_isa_disasm(bin, so->info.sizedwords * 4, out,
|
||||
|
|
|
|||
|
|
@ -260,6 +260,12 @@ struct ir3_const_image_dims {
|
|||
uint32_t off[IR3_MAX_SHADER_IMAGES];
|
||||
};
|
||||
|
||||
struct ir3_imm_const_state {
|
||||
unsigned size;
|
||||
unsigned count;
|
||||
uint32_t *values;
|
||||
};
|
||||
|
||||
/**
|
||||
* Describes the layout of shader consts in the const register file
|
||||
* and additional info about individual allocations.
|
||||
|
|
@ -289,10 +295,6 @@ struct ir3_const_state {
|
|||
|
||||
struct ir3_const_image_dims image_dims;
|
||||
|
||||
unsigned immediates_count;
|
||||
unsigned immediates_size;
|
||||
uint32_t *immediates;
|
||||
|
||||
/* State of ubo access lowered to push consts: */
|
||||
struct ir3_ubo_analysis_state ubo_state;
|
||||
enum ir3_push_consts_type push_consts_type;
|
||||
|
|
@ -674,6 +676,13 @@ struct ir3_shader_variant {
|
|||
|
||||
struct ir3_const_state *const_state;
|
||||
|
||||
/* Immediate values that will be lowered to const registers. Before a7xx,
|
||||
* this will be uploaded together with the const_state. From a7xx on (where
|
||||
* load_shader_consts_via_preamble is true), this will be lowered to const
|
||||
* stores in the preamble.
|
||||
*/
|
||||
struct ir3_imm_const_state imm_state;
|
||||
|
||||
/*
|
||||
* The following macros are used by the shader disk cache save/
|
||||
* restore paths to serialize/deserialize the variant. Any
|
||||
|
|
@ -1083,6 +1092,7 @@ ir3_max_const(const struct ir3_shader_variant *v)
|
|||
return _ir3_max_const(v, v->key.safe_constlen);
|
||||
}
|
||||
|
||||
bool ir3_const_ensure_imm_size(struct ir3_shader_variant *v, unsigned size);
|
||||
uint16_t ir3_const_find_imm(struct ir3_shader_variant *v, uint32_t imm);
|
||||
uint16_t ir3_const_add_imm(struct ir3_shader_variant *v, uint32_t imm);
|
||||
|
||||
|
|
|
|||
|
|
@ -1157,7 +1157,8 @@ tu_xs_get_immediates_packet_size_dwords(const struct ir3_shader_variant *xs)
|
|||
{
|
||||
const struct ir3_const_state *const_state = ir3_const_state(xs);
|
||||
uint32_t base = const_state->allocs.max_const_offset_vec4;
|
||||
int32_t size = DIV_ROUND_UP(const_state->immediates_count, 4);
|
||||
const struct ir3_imm_const_state *imm_state = &xs->imm_state;
|
||||
int32_t size = DIV_ROUND_UP(imm_state->count, 4);
|
||||
|
||||
/* truncate size to avoid writing constants that shader
|
||||
* does not use:
|
||||
|
|
@ -1365,6 +1366,7 @@ tu6_emit_xs(struct tu_cs *cs,
|
|||
|
||||
const struct ir3_const_state *const_state = ir3_const_state(xs);
|
||||
uint32_t base = const_state->allocs.max_const_offset_vec4;
|
||||
const struct ir3_imm_const_state *imm_state = &xs->imm_state;
|
||||
unsigned immediate_size = tu_xs_get_immediates_packet_size_dwords(xs);
|
||||
|
||||
if (immediate_size > 0) {
|
||||
|
|
@ -1378,7 +1380,7 @@ tu6_emit_xs(struct tu_cs *cs,
|
|||
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
|
||||
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
|
||||
|
||||
tu_cs_emit_array(cs, const_state->immediates, immediate_size);
|
||||
tu_cs_emit_array(cs, imm_state->values, immediate_size);
|
||||
}
|
||||
|
||||
if (const_state->consts_ubo.idx != -1) {
|
||||
|
|
|
|||
|
|
@ -300,7 +300,7 @@ ir3_emit_immediates(const struct ir3_shader_variant *v,
|
|||
{
|
||||
const struct ir3_const_state *const_state = ir3_const_state(v);
|
||||
uint32_t base = const_state->allocs.max_const_offset_vec4;
|
||||
int size = DIV_ROUND_UP(const_state->immediates_count, 4);
|
||||
int size = DIV_ROUND_UP(v->imm_state.count, 4);
|
||||
|
||||
/* truncate size to avoid writing constants that shader
|
||||
* does not use:
|
||||
|
|
@ -312,7 +312,7 @@ ir3_emit_immediates(const struct ir3_shader_variant *v,
|
|||
size *= 4;
|
||||
|
||||
if (size > 0)
|
||||
emit_const_user(ring, v, base, size, const_state->immediates);
|
||||
emit_const_user(ring, v, base, size, v->imm_state.values);
|
||||
|
||||
/* NIR constant data has the same lifetime as immediates, so upload it
|
||||
* now, too.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue