mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 09:08:10 +02:00
i915/corm: deferred constant allocation with per-channel UBO mixing
When opts.deferred_const is set, defer scalar load_const allocation until the consuming ALU instruction. coalesce_constants resolves deferred constants with a preferred register hint so co-occurring constants pack into the same CONST register, avoiding dual-constant MOV penalties. Also fix per-channel UBO constant flags: mark only the actually loaded channels with I915_CONSTFLAG_USER_CH(comp+i) instead of setting all user bits, leaving free channels for immediates. shader-db (I915_FS=nir): 210/403 compiled, 3202 alu shader-db (I915_FS=both): nir won 210 (26 identical, 16 tied, 165 better, 3 only), 77 TGSI, 116 neither Assisted-by: Claude
This commit is contained in:
parent
28400d7c6c
commit
75ef9f6d65
1 changed files with 64 additions and 4 deletions
|
|
@ -22,6 +22,7 @@ struct nir_to_i915 {
|
|||
|
||||
uint32_t *ureg_map;
|
||||
uint32_t **def_csr;
|
||||
float *deferred_const;
|
||||
unsigned ureg_map_size;
|
||||
|
||||
int *last_use;
|
||||
|
|
@ -75,10 +76,28 @@ set_ureg(struct nir_to_i915 *c, nir_def *def, uint32_t ureg)
|
|||
c->ureg_map[def->index] = ureg;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_deferred(struct nir_to_i915 *c, unsigned ssa_index)
|
||||
{
|
||||
return c->ureg_map[ssa_index] == UREG_BAD;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
resolve_const(struct nir_to_i915 *c, unsigned ssa_index, int preferred_reg)
|
||||
{
|
||||
uint32_t ureg = i915_emit_const1f_prefer(c->p,
|
||||
c->deferred_const[ssa_index],
|
||||
preferred_reg);
|
||||
c->ureg_map[ssa_index] = ureg;
|
||||
return ureg;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
src_ureg(struct nir_to_i915 *c, nir_src *src)
|
||||
{
|
||||
assert(src->ssa->index < c->ureg_map_size);
|
||||
if (c->ureg_map[src->ssa->index] == UREG_BAD)
|
||||
resolve_const(c, src->ssa->index, -1);
|
||||
return c->ureg_map[src->ssa->index];
|
||||
}
|
||||
|
||||
|
|
@ -179,9 +198,17 @@ emit_load_const(struct nir_to_i915 *c, nir_load_const_instr *load)
|
|||
struct i915_fp_compile *p = c->p;
|
||||
|
||||
switch (load->def.num_components) {
|
||||
case 1:
|
||||
set_ureg(c, &load->def, i915_emit_const1f(p, load->value[0].f32));
|
||||
case 1: {
|
||||
float val = load->value[0].f32;
|
||||
if (c->opts.deferred_const &&
|
||||
val != 0.0f && val != 1.0f && val != -1.0f) {
|
||||
c->deferred_const[load->def.index] = val;
|
||||
set_ureg(c, &load->def, UREG_BAD);
|
||||
} else {
|
||||
set_ureg(c, &load->def, i915_emit_const1f(p, val));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 2:
|
||||
set_ureg(c, &load->def,
|
||||
i915_emit_const2f(p, load->value[0].f32,
|
||||
|
|
@ -205,6 +232,35 @@ emit_load_const(struct nir_to_i915 *c, nir_load_const_instr *load)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
coalesce_constants(struct nir_to_i915 *c, nir_alu_instr *alu)
|
||||
{
|
||||
unsigned n = nir_op_infos[alu->op].num_inputs;
|
||||
unsigned deferred[3];
|
||||
unsigned nr_deferred = 0;
|
||||
int preferred = -1;
|
||||
|
||||
for (unsigned i = 0; i < n; i++) {
|
||||
unsigned idx = alu->src[i].src.ssa->index;
|
||||
if (is_deferred(c, idx)) {
|
||||
deferred[nr_deferred++] = idx;
|
||||
} else {
|
||||
uint32_t ureg = c->ureg_map[idx];
|
||||
if (GET_UREG_TYPE(ureg) == REG_TYPE_CONST && preferred < 0)
|
||||
preferred = GET_UREG_NR(ureg);
|
||||
}
|
||||
}
|
||||
|
||||
if (nr_deferred == 0)
|
||||
return;
|
||||
|
||||
for (unsigned i = 0; i < nr_deferred; i++) {
|
||||
uint32_t ureg = resolve_const(c, deferred[i], preferred);
|
||||
if (preferred < 0 && GET_UREG_TYPE(ureg) == REG_TYPE_CONST)
|
||||
preferred = GET_UREG_NR(ureg);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_alu(struct nir_to_i915 *c, nir_alu_instr *alu)
|
||||
{
|
||||
|
|
@ -214,6 +270,8 @@ emit_alu(struct nir_to_i915 *c, nir_alu_instr *alu)
|
|||
uint32_t dest = UREG(REG_TYPE_R, i915_get_temp(p));
|
||||
set_ureg(c, def, dest);
|
||||
|
||||
coalesce_constants(c, alu);
|
||||
|
||||
uint32_t src0 = 0, src1 = 0, src2 = 0;
|
||||
if (nir_op_infos[alu->op].num_inputs >= 1)
|
||||
src0 = alu_src_ureg(c, &alu->src[0]);
|
||||
|
|
@ -756,7 +814,7 @@ emit_intrinsic(struct nir_to_i915 *c, nir_intrinsic_instr *intr)
|
|||
}
|
||||
|
||||
for (unsigned i = 0; i < intr->def.num_components; i++)
|
||||
ifs->constant_flags[slot] |= I915_CONSTFLAG_USER;
|
||||
ifs->constant_flags[slot] |= I915_CONSTFLAG_USER_CH(comp + i);
|
||||
ifs->num_constants = MAX2(ifs->num_constants, slot + 1);
|
||||
|
||||
uint32_t reg = UREG(REG_TYPE_CONST, slot);
|
||||
|
|
@ -791,7 +849,7 @@ emit_intrinsic(struct nir_to_i915 *c, nir_intrinsic_instr *intr)
|
|||
}
|
||||
|
||||
for (unsigned i = 0; i < intr->def.num_components; i++)
|
||||
ifs->constant_flags[slot] |= I915_CONSTFLAG_USER;
|
||||
ifs->constant_flags[slot] |= I915_CONSTFLAG_USER_CH(comp + i);
|
||||
ifs->num_constants = MAX2(ifs->num_constants, slot + 1);
|
||||
|
||||
uint32_t reg = UREG(REG_TYPE_CONST, slot);
|
||||
|
|
@ -937,6 +995,7 @@ i915_translate_fragment_program_nir(struct i915_context *i915,
|
|||
.ureg_map_size = impl->ssa_alloc,
|
||||
.ureg_map = CALLOC(impl->ssa_alloc, sizeof(uint32_t)),
|
||||
.def_csr = CALLOC(impl->ssa_alloc, sizeof(uint32_t *)),
|
||||
.deferred_const = CALLOC(impl->ssa_alloc, sizeof(float)),
|
||||
.last_use = CALLOC(impl->ssa_alloc, sizeof(int)),
|
||||
};
|
||||
|
||||
|
|
@ -1017,6 +1076,7 @@ cleanup:
|
|||
ralloc_free(p->error);
|
||||
|
||||
FREE(c.last_use);
|
||||
FREE(c.deferred_const);
|
||||
FREE(c.def_csr);
|
||||
FREE(c.ureg_map);
|
||||
FREE(p);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue