From 17b699ae24f63436f0a16456bb798dfcb8eba708 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Wed, 6 May 2026 20:07:44 -0400 Subject: [PATCH] i915/corm: use hardware swizzle constants for vec3/4 load_const When a vec3 or vec4 load_const has all components from {0, 1, -1}, emit a swizzle+negate alias using the hardware ZERO/ONE source constants instead of allocating a constant register via i915_emit_const4fv. This matches what the TGSI path does through its immediate recognition. Saves a constant register slot per qualifying load_const and converts 32 of 33 previous ties to identical output. shader-db (I915_FS=nir): 212/403 compiled, 3227 alu shader-db (I915_FS=both): nir won 212 (26 identical, 1 tied, 182 better, 3 only), 75 TGSI, 116 neither Assisted-by: Claude --- src/gallium/drivers/i915/i915_fpc_nir.c | 34 ++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/i915/i915_fpc_nir.c b/src/gallium/drivers/i915/i915_fpc_nir.c index 99a61d385c3..27f57ece08b 100644 --- a/src/gallium/drivers/i915/i915_fpc_nir.c +++ b/src/gallium/drivers/i915/i915_fpc_nir.c @@ -216,13 +216,39 @@ emit_load_const(struct nir_to_i915 *c, nir_load_const_instr *load) break; case 3: case 4: { + unsigned n = load->def.num_components; float v[4] = { load->value[0].f32, - load->def.num_components > 1 ? load->value[1].f32 : 0.0f, - load->def.num_components > 2 ? load->value[2].f32 : 0.0f, - load->def.num_components > 3 ? load->value[3].f32 : 0.0f, + n > 1 ? load->value[1].f32 : 0.0f, + n > 2 ? load->value[2].f32 : 0.0f, + n > 3 ? load->value[3].f32 : 0.0f, }; - set_ureg(c, &load->def, i915_emit_const4fv(p, v)); + + uint32_t ch[4] = { X, Y, Z, W }; + int ng[4] = { 0, 0, 0, 0 }; + bool all_swizzle = true; + for (unsigned i = 0; i < n; i++) { + if (v[i] == 0.0f) + ch[i] = ZERO; + else if (v[i] == 1.0f) + ch[i] = ONE; + else if (v[i] == -1.0f) { + ch[i] = ONE; + ng[i] = 1; + } else { + all_swizzle = false; + break; + } + } + + if (all_swizzle) { + set_ureg(c, &load->def, + negate(swizzle(UREG(REG_TYPE_R, 0), + ch[0], ch[1], ch[2], ch[3]), + ng[0], ng[1], ng[2], ng[3])); + } else { + set_ureg(c, &load->def, i915_emit_const4fv(p, v)); + } break; } default: