mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 11:18:08 +02:00
i915/corm: multi-component ALU dest folding in vec construction
Generalize the scalar ALU dest fold to handle multi-component results. When a vec source covers contiguous channels with identity swizzle and all uses of the source come from this vec, patch the ALU instruction to write directly into the vec's dest register with the appropriate channel mask. This eliminates redundant MOVs for patterns like vec4(%result.x, %result.y, %result.z, %other) where %result is a vec3 ALU output — the ALU instruction now writes directly to the output register's .xyz channels. shader-db (I915_FS=nir): 233/403 compiled, 3328 alu shader-db (I915_FS=both): nir won 233 (26 identical, 1 tied, 203 better, 3 only), 54 TGSI, 116 neither Assisted-by: Claude
This commit is contained in:
parent
879cf1bd74
commit
595b9850e0
1 changed files with 28 additions and 7 deletions
|
|
@ -550,27 +550,48 @@ emit_alu(struct nir_to_i915 *c, nir_alu_instr *alu)
|
|||
neg_sel[i] = (srcs[i] >> UREG_CHANNEL_X_NEGATE_SHIFT) & 0x1;
|
||||
}
|
||||
|
||||
/* Single-component ALU dest folding: if a vec source is a single-use
|
||||
* scalar ALU result in a temp, patch that instruction to write directly
|
||||
* into our dest with the right channel mask.
|
||||
/* ALU dest folding: if a vec source is a single-use ALU result in a
|
||||
* temp with identity swizzle, patch that instruction to write
|
||||
* directly into our dest with the right channel mask.
|
||||
*/
|
||||
for (unsigned i = 0; i < n; i++) {
|
||||
nir_def *src_def = alu->src[i].src.ssa;
|
||||
uint32_t *prev_csr = c->def_csr[src_def->index];
|
||||
if (!prev_csr || !list_is_singular(&src_def->uses))
|
||||
if (!prev_csr)
|
||||
continue;
|
||||
if (GET_UREG_TYPE(srcs[i]) != REG_TYPE_R)
|
||||
continue;
|
||||
if (src_def->num_components != 1)
|
||||
unsigned nc = src_def->num_components;
|
||||
if (i + nc > n)
|
||||
continue;
|
||||
bool identity = true;
|
||||
for (unsigned j = 0; j < nc && identity; j++)
|
||||
identity = (j == 0 || alu->src[i + j].src.ssa == src_def) &&
|
||||
(alu->src[i + j].swizzle[0] == j);
|
||||
if (!identity)
|
||||
continue;
|
||||
bool all_from_this_vec = true;
|
||||
nir_foreach_use(use, src_def) {
|
||||
if (nir_src_use_instr(use) != &alu->instr) {
|
||||
all_from_this_vec = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!all_from_this_vec)
|
||||
continue;
|
||||
|
||||
uint32_t fold_mask = 0;
|
||||
for (unsigned j = 0; j < nc; j++)
|
||||
fold_mask |= chan_mask[i + j];
|
||||
|
||||
prev_csr[0] = (prev_csr[0] & ~(A0_DEST_CHANNEL_ALL |
|
||||
(0x1ff << A0_DEST_NR_SHIFT))) |
|
||||
A0_DEST(dest) | chan_mask[i];
|
||||
A0_DEST(dest) | fold_mask;
|
||||
|
||||
i915_release_temp(p, GET_UREG_NR(srcs[i]));
|
||||
c->ureg_map[src_def->index] = dest;
|
||||
emitted[i] = true;
|
||||
for (unsigned j = 0; j < nc; j++)
|
||||
emitted[i + j] = true;
|
||||
}
|
||||
|
||||
/* Process real-register sources first, folding in any ZERO/ONE
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue