mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 09:08:10 +02:00
i915/corm: use hardware swizzle constants for vec3/4 load_const
When a vec3 or vec4 load_const has all components from {0, 1, -1},
emit a swizzle+negate alias using the hardware ZERO/ONE source
constants instead of allocating a constant register via
i915_emit_const4fv. This matches what the TGSI path does through
its immediate recognition.
Saves a constant register slot per qualifying load_const and
converts 32 of 33 previous ties to identical output.
shader-db (I915_FS=nir): 212/403 compiled, 3227 alu
shader-db (I915_FS=both): nir won 212 (26 identical, 1 tied, 182 better, 3 only),
75 TGSI, 116 neither
Assisted-by: Claude
This commit is contained in:
parent
4885eb02ab
commit
17b699ae24
1 changed files with 30 additions and 4 deletions
|
|
@ -216,13 +216,39 @@ emit_load_const(struct nir_to_i915 *c, nir_load_const_instr *load)
|
|||
break;
|
||||
case 3:
|
||||
case 4: {
|
||||
unsigned n = load->def.num_components;
|
||||
float v[4] = {
|
||||
load->value[0].f32,
|
||||
load->def.num_components > 1 ? load->value[1].f32 : 0.0f,
|
||||
load->def.num_components > 2 ? load->value[2].f32 : 0.0f,
|
||||
load->def.num_components > 3 ? load->value[3].f32 : 0.0f,
|
||||
n > 1 ? load->value[1].f32 : 0.0f,
|
||||
n > 2 ? load->value[2].f32 : 0.0f,
|
||||
n > 3 ? load->value[3].f32 : 0.0f,
|
||||
};
|
||||
set_ureg(c, &load->def, i915_emit_const4fv(p, v));
|
||||
|
||||
uint32_t ch[4] = { X, Y, Z, W };
|
||||
int ng[4] = { 0, 0, 0, 0 };
|
||||
bool all_swizzle = true;
|
||||
for (unsigned i = 0; i < n; i++) {
|
||||
if (v[i] == 0.0f)
|
||||
ch[i] = ZERO;
|
||||
else if (v[i] == 1.0f)
|
||||
ch[i] = ONE;
|
||||
else if (v[i] == -1.0f) {
|
||||
ch[i] = ONE;
|
||||
ng[i] = 1;
|
||||
} else {
|
||||
all_swizzle = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (all_swizzle) {
|
||||
set_ureg(c, &load->def,
|
||||
negate(swizzle(UREG(REG_TYPE_R, 0),
|
||||
ch[0], ch[1], ch[2], ch[3]),
|
||||
ng[0], ng[1], ng[2], ng[3]));
|
||||
} else {
|
||||
set_ureg(c, &load->def, i915_emit_const4fv(p, v));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue