ir3: don't modify const state for the binning variant in ir3_cp

ir3_cp uses the const state to lower immediates. It doesn't take the
binning variant into account so in theory, it could add immediates to
the const state for the binning variant, modifying the state after its
layout had already been established for the non-binning variant.

In practice, this probably works fine as the immediates are the last
section of the const state so the layout wouldn't be changed by ir3_cp.
However, there is very little benefit in doing this (the chances for an
immediate being necessary in the binning variant but not in the
non-binning variant are small) so it's better to stick to the global
invariant of not changing the const state in the binning variant.

This commit makes sure that immediates are never added by ir3_cp for the
binning variant. Some refactoring was necessary, however, since the
lookup and modification of the const state were a bit intertwined. More
specifically, currently, the immediate storage in the const state would
always be enlarged by ir3_cp whenever its full, even when it might
actually contain the immediate. To fix this, the logic is split in two
functions: ir3_const_find_imm() for lookup (allowed to be called by the
binning variant), and ir3_const_add_imm for adding immediates (not
allowed to be called by the binning variant).

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30256>
This commit is contained in:
Job Noorman 2024-08-16 08:19:26 +02:00 committed by Marge Bot
parent 24b422dc3e
commit 7036d0fcf7
4 changed files with 66 additions and 31 deletions

View file

@ -524,6 +524,8 @@ regid(int num, int comp)
#define REG_P0 62 /* predicate register */
#define REG_P0_X regid(REG_P0, 0) /* p0.x */
#define INVALID_CONST_REG UINT16_MAX
/* With is_bindless_s2en = 1, this determines whether bindless is enabled and
* if so, how to get the (base, index) pair for both sampler and texture.
* There is a single base embedded in the instruction, which is always used

View file

@ -187,43 +187,20 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n,
new_flags &= ~IR3_REG_FNEG;
}
/* Reallocate for 4 more elements whenever it's necessary. Note that ir3
* printing relies on having groups of 4 dwords, so we fill the unused
* slots with a dummy value.
*/
struct ir3_const_state *const_state = ir3_const_state(ctx->so);
if (const_state->immediates_count == const_state->immediates_size) {
const_state->immediates = rerzalloc(
const_state, const_state->immediates,
__typeof__(const_state->immediates[0]), const_state->immediates_size,
const_state->immediates_size + 4);
const_state->immediates_size += 4;
reg->num = ir3_const_find_imm(ctx->so, reg->uim_val);
for (int i = const_state->immediates_count;
i < const_state->immediates_size; i++)
const_state->immediates[i] = 0xd0d0d0d0;
}
int i;
for (i = 0; i < const_state->immediates_count; i++) {
if (const_state->immediates[i] == reg->uim_val)
break;
}
if (i == const_state->immediates_count) {
/* Add on a new immediate to be pushed, if we have space left in the
* constbuf.
*/
if (const_state->offsets.immediate + const_state->immediates_count / 4 >=
ir3_max_const(ctx->so))
if (reg->num == INVALID_CONST_REG) {
/* Don't modify the const state for the binning variant. */
if (ctx->so->binning_pass)
return false;
const_state->immediates[i] = reg->uim_val;
const_state->immediates_count++;
reg->num = ir3_const_add_imm(ctx->so, reg->uim_val);
if (reg->num == INVALID_CONST_REG)
return false;
}
reg->flags = new_flags;
reg->num = i + (4 * const_state->offsets.immediate);
instr->srcs[n] = reg;

View file

@ -43,6 +43,59 @@
#include "disasm.h"
static uint16_t
const_imm_index_to_reg(const struct ir3_const_state *const_state, unsigned i)
{
return i + (4 * const_state->offsets.immediate);
}
uint16_t
ir3_const_find_imm(struct ir3_shader_variant *v, uint32_t imm)
{
const struct ir3_const_state *const_state = ir3_const_state(v);
for (unsigned i = 0; i < const_state->immediates_count; i++) {
if (const_state->immediates[i] == imm)
return const_imm_index_to_reg(const_state, i);
}
return INVALID_CONST_REG;
}
uint16_t
ir3_const_add_imm(struct ir3_shader_variant *v, uint32_t imm)
{
struct ir3_const_state *const_state = ir3_const_state(v);
/* Reallocate for 4 more elements whenever it's necessary. Note that ir3
* printing relies on having groups of 4 dwords, so we fill the unused
* slots with a dummy value.
*/
if (const_state->immediates_count == const_state->immediates_size) {
const_state->immediates = rerzalloc(
const_state, const_state->immediates,
__typeof__(const_state->immediates[0]), const_state->immediates_size,
const_state->immediates_size + 4);
const_state->immediates_size += 4;
for (int i = const_state->immediates_count;
i < const_state->immediates_size; i++) {
const_state->immediates[i] = 0xd0d0d0d0;
}
}
/* Add on a new immediate to be pushed, if we have space left in the
* constbuf.
*/
if (const_state->offsets.immediate + const_state->immediates_count / 4 >=
ir3_max_const(v)) {
return INVALID_CONST_REG;
}
const_state->immediates[const_state->immediates_count] = imm;
return const_imm_index_to_reg(const_state, const_state->immediates_count++);
}
int
ir3_glsl_type_size(const struct glsl_type *type, bool bindless)
{

View file

@ -986,6 +986,9 @@ ir3_max_const(const struct ir3_shader_variant *v)
return _ir3_max_const(v, v->key.safe_constlen);
}
uint16_t ir3_const_find_imm(struct ir3_shader_variant *v, uint32_t imm);
uint16_t ir3_const_add_imm(struct ir3_shader_variant *v, uint32_t imm);
/* Return true if a variant may need to be recompiled due to exceeding the
* maximum "safe" constlen.
*/