mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-22 09:40:40 +02:00
ir3: don't modify const state for the binning variant in ir3_cp
ir3_cp uses the const state to lower immediates. It doesn't take the binning variant into account so in theory, it could add immediates to the const state for the binning variant, modifying the state after its layout had already been established for the non-binning variant. In practice, this probably works fine as the immediates are the last section of the const state so the layout wouldn't be changed by ir3_cp. However, there is very little benefit in doing this (the chances for an immediate being necessary in the binning variant but not in the non-binning variant are small) so it's better to stick to the global invariant of not changing the const state in the binning variant. This commit makes sure that immediates are never added by ir3_cp for the binning variant. Some refactoring was necessary, however, since the lookup and modification of the const state were a bit intertwined. More specifically, currently, the immediate storage in the const state would always be enlarged by ir3_cp whenever its full, even when it might actually contain the immediate. To fix this, the logic is split in two functions: ir3_const_find_imm() for lookup (allowed to be called by the binning variant), and ir3_const_add_imm for adding immediates (not allowed to be called by the binning variant). Signed-off-by: Job Noorman <jnoorman@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30256>
This commit is contained in:
parent
24b422dc3e
commit
7036d0fcf7
4 changed files with 66 additions and 31 deletions
|
|
@ -524,6 +524,8 @@ regid(int num, int comp)
|
|||
#define REG_P0 62 /* predicate register */
|
||||
#define REG_P0_X regid(REG_P0, 0) /* p0.x */
|
||||
|
||||
#define INVALID_CONST_REG UINT16_MAX
|
||||
|
||||
/* With is_bindless_s2en = 1, this determines whether bindless is enabled and
|
||||
* if so, how to get the (base, index) pair for both sampler and texture.
|
||||
* There is a single base embedded in the instruction, which is always used
|
||||
|
|
|
|||
|
|
@ -187,43 +187,20 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n,
|
|||
new_flags &= ~IR3_REG_FNEG;
|
||||
}
|
||||
|
||||
/* Reallocate for 4 more elements whenever it's necessary. Note that ir3
|
||||
* printing relies on having groups of 4 dwords, so we fill the unused
|
||||
* slots with a dummy value.
|
||||
*/
|
||||
struct ir3_const_state *const_state = ir3_const_state(ctx->so);
|
||||
if (const_state->immediates_count == const_state->immediates_size) {
|
||||
const_state->immediates = rerzalloc(
|
||||
const_state, const_state->immediates,
|
||||
__typeof__(const_state->immediates[0]), const_state->immediates_size,
|
||||
const_state->immediates_size + 4);
|
||||
const_state->immediates_size += 4;
|
||||
reg->num = ir3_const_find_imm(ctx->so, reg->uim_val);
|
||||
|
||||
for (int i = const_state->immediates_count;
|
||||
i < const_state->immediates_size; i++)
|
||||
const_state->immediates[i] = 0xd0d0d0d0;
|
||||
}
|
||||
|
||||
int i;
|
||||
for (i = 0; i < const_state->immediates_count; i++) {
|
||||
if (const_state->immediates[i] == reg->uim_val)
|
||||
break;
|
||||
}
|
||||
|
||||
if (i == const_state->immediates_count) {
|
||||
/* Add on a new immediate to be pushed, if we have space left in the
|
||||
* constbuf.
|
||||
*/
|
||||
if (const_state->offsets.immediate + const_state->immediates_count / 4 >=
|
||||
ir3_max_const(ctx->so))
|
||||
if (reg->num == INVALID_CONST_REG) {
|
||||
/* Don't modify the const state for the binning variant. */
|
||||
if (ctx->so->binning_pass)
|
||||
return false;
|
||||
|
||||
const_state->immediates[i] = reg->uim_val;
|
||||
const_state->immediates_count++;
|
||||
reg->num = ir3_const_add_imm(ctx->so, reg->uim_val);
|
||||
|
||||
if (reg->num == INVALID_CONST_REG)
|
||||
return false;
|
||||
}
|
||||
|
||||
reg->flags = new_flags;
|
||||
reg->num = i + (4 * const_state->offsets.immediate);
|
||||
|
||||
instr->srcs[n] = reg;
|
||||
|
||||
|
|
|
|||
|
|
@ -43,6 +43,59 @@
|
|||
|
||||
#include "disasm.h"
|
||||
|
||||
static uint16_t
|
||||
const_imm_index_to_reg(const struct ir3_const_state *const_state, unsigned i)
|
||||
{
|
||||
return i + (4 * const_state->offsets.immediate);
|
||||
}
|
||||
|
||||
uint16_t
|
||||
ir3_const_find_imm(struct ir3_shader_variant *v, uint32_t imm)
|
||||
{
|
||||
const struct ir3_const_state *const_state = ir3_const_state(v);
|
||||
|
||||
for (unsigned i = 0; i < const_state->immediates_count; i++) {
|
||||
if (const_state->immediates[i] == imm)
|
||||
return const_imm_index_to_reg(const_state, i);
|
||||
}
|
||||
|
||||
return INVALID_CONST_REG;
|
||||
}
|
||||
|
||||
uint16_t
|
||||
ir3_const_add_imm(struct ir3_shader_variant *v, uint32_t imm)
|
||||
{
|
||||
struct ir3_const_state *const_state = ir3_const_state(v);
|
||||
|
||||
/* Reallocate for 4 more elements whenever it's necessary. Note that ir3
|
||||
* printing relies on having groups of 4 dwords, so we fill the unused
|
||||
* slots with a dummy value.
|
||||
*/
|
||||
if (const_state->immediates_count == const_state->immediates_size) {
|
||||
const_state->immediates = rerzalloc(
|
||||
const_state, const_state->immediates,
|
||||
__typeof__(const_state->immediates[0]), const_state->immediates_size,
|
||||
const_state->immediates_size + 4);
|
||||
const_state->immediates_size += 4;
|
||||
|
||||
for (int i = const_state->immediates_count;
|
||||
i < const_state->immediates_size; i++) {
|
||||
const_state->immediates[i] = 0xd0d0d0d0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Add on a new immediate to be pushed, if we have space left in the
|
||||
* constbuf.
|
||||
*/
|
||||
if (const_state->offsets.immediate + const_state->immediates_count / 4 >=
|
||||
ir3_max_const(v)) {
|
||||
return INVALID_CONST_REG;
|
||||
}
|
||||
|
||||
const_state->immediates[const_state->immediates_count] = imm;
|
||||
return const_imm_index_to_reg(const_state, const_state->immediates_count++);
|
||||
}
|
||||
|
||||
int
|
||||
ir3_glsl_type_size(const struct glsl_type *type, bool bindless)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -986,6 +986,9 @@ ir3_max_const(const struct ir3_shader_variant *v)
|
|||
return _ir3_max_const(v, v->key.safe_constlen);
|
||||
}
|
||||
|
||||
uint16_t ir3_const_find_imm(struct ir3_shader_variant *v, uint32_t imm);
|
||||
uint16_t ir3_const_add_imm(struct ir3_shader_variant *v, uint32_t imm);
|
||||
|
||||
/* Return true if a variant may need to be recompiled due to exceeding the
|
||||
* maximum "safe" constlen.
|
||||
*/
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue