mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 09:08:10 +02:00
i915/corm: add fsat folding, output dest folding, and vec dest folding
Add the def_csr mechanism: track the instruction cursor position for each single-instruction SSA def so we can retroactively patch it. fsat folding: when a single-use SSA def feeds into fsat, fold A0_DEST_SATURATE into the previous instruction instead of emitting a separate MOV. Output dest folding: when store_output consumes a single-use temp, patch the previous instruction to write directly to the output register (OC/OD). Includes vec look-through for the identity-swizzle case where a vec was collapsed to a register alias. Vec dest folding: single-use scalar ALU results feeding a vec component get patched to write directly into the vec dest register. shader-db (I915_FS=nir): 209/403 compiled, 3157 alu shader-db (I915_FS=both): nir won 209 (26 identical, 16 tied, 164 better, 3 only), 78 TGSI, 116 neither Assisted-by: Claude
This commit is contained in:
parent
ed934ae17b
commit
28400d7c6c
1 changed files with 84 additions and 1 deletions
|
|
@ -21,6 +21,7 @@ struct nir_to_i915 {
|
|||
struct i915_fragment_shader *ifs;
|
||||
|
||||
uint32_t *ureg_map;
|
||||
uint32_t **def_csr;
|
||||
unsigned ureg_map_size;
|
||||
|
||||
int *last_use;
|
||||
|
|
@ -221,6 +222,8 @@ emit_alu(struct nir_to_i915 *c, nir_alu_instr *alu)
|
|||
if (nir_op_infos[alu->op].num_inputs >= 3)
|
||||
src2 = alu_src_ureg(c, &alu->src[2]);
|
||||
|
||||
uint32_t *pre_csr = p->csr;
|
||||
|
||||
switch (alu->op) {
|
||||
case nir_op_mov:
|
||||
case nir_op_fcanonicalize:
|
||||
|
|
@ -237,9 +240,22 @@ emit_alu(struct nir_to_i915 *c, nir_alu_instr *alu)
|
|||
i915_emit_arith(p, A0_MAX, dest, mask, 0,
|
||||
src0, negate(src0, 1, 1, 1, 1), 0);
|
||||
break;
|
||||
case nir_op_fsat:
|
||||
case nir_op_fsat: {
|
||||
nir_def *src_def = alu->src[0].src.ssa;
|
||||
uint32_t *prev = c->def_csr[src_def->index];
|
||||
if (prev && list_is_singular(&src_def->uses)) {
|
||||
prev[0] |= A0_DEST_SATURATE;
|
||||
i915_release_temp(p, GET_UREG_NR(dest));
|
||||
set_ureg(c, def, src_ureg(c, &alu->src[0].src));
|
||||
c->def_csr[def->index] = prev;
|
||||
unsigned src_idx = alu->src[0].src.ssa->index;
|
||||
if (c->last_use[src_idx] == c->ip)
|
||||
c->last_use[src_idx] = c->last_use[def->index];
|
||||
return;
|
||||
}
|
||||
i915_emit_arith(p, A0_MOV, dest, mask, A0_DEST_SATURATE, src0, 0, 0);
|
||||
break;
|
||||
}
|
||||
case nir_op_fadd:
|
||||
i915_emit_arith(p, A0_ADD, dest, mask, 0, src0, src1, 0);
|
||||
break;
|
||||
|
|
@ -399,6 +415,29 @@ emit_alu(struct nir_to_i915 *c, nir_alu_instr *alu)
|
|||
neg_sel[i] = (srcs[i] >> UREG_CHANNEL_X_NEGATE_SHIFT) & 0x1;
|
||||
}
|
||||
|
||||
/* Single-component ALU dest folding: if a vec source is a single-use
|
||||
* scalar ALU result in a temp, patch that instruction to write directly
|
||||
* into our dest with the right channel mask.
|
||||
*/
|
||||
for (unsigned i = 0; i < n; i++) {
|
||||
nir_def *src_def = alu->src[i].src.ssa;
|
||||
uint32_t *prev_csr = c->def_csr[src_def->index];
|
||||
if (!prev_csr || !list_is_singular(&src_def->uses))
|
||||
continue;
|
||||
if (GET_UREG_TYPE(srcs[i]) != REG_TYPE_R)
|
||||
continue;
|
||||
if (src_def->num_components != 1)
|
||||
continue;
|
||||
|
||||
prev_csr[0] = (prev_csr[0] & ~(A0_DEST_CHANNEL_ALL |
|
||||
(0x1ff << A0_DEST_NR_SHIFT))) |
|
||||
A0_DEST(dest) | chan_mask[i];
|
||||
|
||||
i915_release_temp(p, GET_UREG_NR(srcs[i]));
|
||||
c->ureg_map[src_def->index] = dest;
|
||||
emitted[i] = true;
|
||||
}
|
||||
|
||||
/* Process real-register sources first, folding in any ZERO/ONE
|
||||
* const-swizzle sources that can piggyback on the same MOV.
|
||||
* Use the unswizzled base register since swizzle() composes.
|
||||
|
|
@ -471,6 +510,9 @@ emit_alu(struct nir_to_i915 *c, nir_alu_instr *alu)
|
|||
break;
|
||||
}
|
||||
|
||||
if (p->csr == pre_csr + 3)
|
||||
c->def_csr[def->index] = pre_csr;
|
||||
|
||||
i915_release_utemps(p);
|
||||
}
|
||||
|
||||
|
|
@ -640,6 +682,45 @@ emit_intrinsic(struct nir_to_i915 *c, nir_intrinsic_instr *intr)
|
|||
dest = UREG(REG_TYPE_OC, 0);
|
||||
}
|
||||
|
||||
nir_def *src_def = intr->src[0].ssa;
|
||||
uint32_t *prev = c->def_csr[src_def->index];
|
||||
|
||||
/* Look through identity vec (same_reg case emits no instructions).
|
||||
* Check that all uses of the underlying def come from this vec.
|
||||
*/
|
||||
bool looked_through_vec = false;
|
||||
if (!prev) {
|
||||
nir_instr *def_instr = nir_def_instr_nonconst(src_def);
|
||||
if (def_instr->type == nir_instr_type_alu) {
|
||||
nir_alu_instr *vec = nir_instr_as_alu(def_instr);
|
||||
if ((vec->op == nir_op_vec4 || vec->op == nir_op_vec3 ||
|
||||
vec->op == nir_op_vec2) &&
|
||||
list_is_singular(&src_def->uses)) {
|
||||
nir_def *inner = vec->src[0].src.ssa;
|
||||
bool all_from_vec = true;
|
||||
nir_foreach_use(use, inner) {
|
||||
if (nir_src_use_instr(use) != def_instr) {
|
||||
all_from_vec = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (all_from_vec) {
|
||||
src_def = inner;
|
||||
prev = c->def_csr[src_def->index];
|
||||
looked_through_vec = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (prev && comp == 0 &&
|
||||
(looked_through_vec || list_is_singular(&src_def->uses))) {
|
||||
prev[0] = (prev[0] & ~(A0_DEST_CHANNEL_ALL |
|
||||
(0x1ff << A0_DEST_NR_SHIFT))) |
|
||||
A0_DEST(dest) | writemask_to_mask(wm);
|
||||
break;
|
||||
}
|
||||
|
||||
if (comp > 0) {
|
||||
uint32_t s[4] = { X, Y, Z, W };
|
||||
for (int i = 3; i >= (int)comp; i--)
|
||||
|
|
@ -855,6 +936,7 @@ i915_translate_fragment_program_nir(struct i915_context *i915,
|
|||
.opts = *opts,
|
||||
.ureg_map_size = impl->ssa_alloc,
|
||||
.ureg_map = CALLOC(impl->ssa_alloc, sizeof(uint32_t)),
|
||||
.def_csr = CALLOC(impl->ssa_alloc, sizeof(uint32_t *)),
|
||||
.last_use = CALLOC(impl->ssa_alloc, sizeof(int)),
|
||||
};
|
||||
|
||||
|
|
@ -935,6 +1017,7 @@ cleanup:
|
|||
ralloc_free(p->error);
|
||||
|
||||
FREE(c.last_use);
|
||||
FREE(c.def_csr);
|
||||
FREE(c.ureg_map);
|
||||
FREE(p);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue