From 4087e3b7ef1e0b9307b84e8a7b94ba6f0bf7aae9 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Wed, 6 May 2026 12:43:25 -0400 Subject: [PATCH] i915: refactor constant and compiler infrastructure for NIR backend Rework the constant register encoding to track per-channel ownership (I915_CONSTFLAG_IMM / I915_CONSTFLAG_USER_CH) instead of whole-register flags, allowing compiler immediates and user UBO values to share a constant register on different channels. Update emit_constants() to handle per-channel source selection at upload time. Add i915_emit_const1f_prefer() for packing scalar constants into a preferred register, reducing dual-constant conflicts. Move i915_program_error(), i915_use_passthrough_shader(), and negate() from i915_fpc_translate.c to shared locations (i915_fpc_emit.c / i915_fpc.h) so the NIR backend can use them. Fix i915_emit_texld() to use a utemp instead of a temp register for texcoord swizzle copies, avoiding unnecessary tex indirect phase boundaries. Add a fallback path that copies to a utemp when bumping the phase count would exceed the hardware limit. Add nr_alu_insn, nr_tex_insn, nr_tex_indirect, nr_temps, writes_z, and input semantic tracking to i915_fragment_shader for use by the NIR backend's multi-variant comparison framework. Assisted-by: Claude --- src/gallium/drivers/i915/i915_context.h | 24 ++- src/gallium/drivers/i915/i915_fpc.h | 23 +++ src/gallium/drivers/i915/i915_fpc_emit.c | 159 +++++++++++++----- src/gallium/drivers/i915/i915_fpc_translate.c | 57 +------ src/gallium/drivers/i915/i915_state_emit.c | 43 ++--- 5 files changed, 190 insertions(+), 116 deletions(-) diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index 0bbbd66662b..ef81f69740c 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -88,8 +88,15 @@ struct i915_winsys_batchbuffer; #define I915_MAX_CONSTANT 32 -/** See constant_flags[] below */ -#define I915_CONSTFLAG_USER 0x1f +/** + * Per-channel flags for constant_flags[]. + * Bits 0-3: channel has a compiler immediate. + * Bits 4-7: channel has a user (UBO) value uploaded at draw time. + * A channel is available when neither bit is set. + */ +#define I915_CONSTFLAG_IMM(ch) (1 << (ch)) +#define I915_CONSTFLAG_USER_CH(ch) (1 << ((ch) + 4)) +#define I915_CONSTFLAG_USER 0xf0 /** * Subclass of pipe_shader_state @@ -103,6 +110,10 @@ struct i915_fragment_shader { uint32_t *program; uint32_t program_len; + uint32_t nr_alu_insn; + uint32_t nr_tex_insn; + uint32_t nr_tex_indirect; + uint32_t nr_temps; /** * constants introduced during translation. @@ -134,12 +145,15 @@ struct i915_fragment_shader { } texcoords[I915_TEX_UNITS]; bool reads_pntc; + bool writes_z; + + unsigned num_inputs; + uint8_t input_semantic_name[PIPE_MAX_SHADER_INPUTS]; + uint8_t input_semantic_index[PIPE_MAX_SHADER_INPUTS]; - /* Set if the shader is an internal (blit, etc.) shader that shouldn't debug - * log by default. */ bool internal; - char *error; /* Any error message from compiling this shader (or NULL) */ + char *error; }; struct i915_cache_context; diff --git a/src/gallium/drivers/i915/i915_fpc.h b/src/gallium/drivers/i915/i915_fpc.h index d234042dea2..fe0d0f1e544 100644 --- a/src/gallium/drivers/i915/i915_fpc.h +++ b/src/gallium/drivers/i915/i915_fpc.h @@ -136,6 +136,15 @@ swizzle(int reg, uint32_t x, uint32_t y, uint32_t z, uint32_t w) CHANNEL_SRC(GET_CHANNEL_SRC(reg, w), 3)); } +static inline int +negate(int reg, int x, int y, int z, int w) +{ + return reg ^ (x << UREG_CHANNEL_X_NEGATE_SHIFT | + y << UREG_CHANNEL_Y_NEGATE_SHIFT | + z << UREG_CHANNEL_Z_NEGATE_SHIFT | + w << UREG_CHANNEL_W_NEGATE_SHIFT); +} + #define A0_DEST(reg) (((reg)&UREG_TYPE_NR_MASK) >> UREG_A0_DEST_SHIFT_LEFT) #define D0_DEST(reg) (((reg)&UREG_TYPE_NR_MASK) >> UREG_A0_DEST_SHIFT_LEFT) #define T0_DEST(reg) (((reg)&UREG_TYPE_NR_MASK) >> UREG_A0_DEST_SHIFT_LEFT) @@ -173,8 +182,20 @@ swizzle(int reg, uint32_t x, uint32_t y, uint32_t z, uint32_t w) */ extern void i915_translate_fragment_program(struct i915_context *i915, struct i915_fragment_shader *fs); +struct corm_compile_opts { + bool deferred_const; + bool seq_sne_opt; +}; + +extern void i915_translate_fragment_program_nir(struct i915_context *i915, + struct i915_fragment_shader *ifs, + struct nir_shader *s, + const struct corm_compile_opts *opts); +extern void i915_use_passthrough_shader(struct i915_fragment_shader *fs); +extern void i915_program_error(struct i915_fp_compile *p, const char *msg, ...); extern uint32_t i915_get_temp(struct i915_fp_compile *p); +extern void i915_release_temp(struct i915_fp_compile *p, int reg); extern uint32_t i915_get_utemp(struct i915_fp_compile *p); extern void i915_release_utemps(struct i915_fp_compile *p); @@ -191,6 +212,8 @@ extern uint32_t i915_emit_decl(struct i915_fp_compile *p, uint32_t type, uint32_t nr, uint32_t d0_flags); extern uint32_t i915_emit_const1f(struct i915_fp_compile *p, float c0); +extern uint32_t i915_emit_const1f_prefer(struct i915_fp_compile *p, float c0, + int preferred_reg); extern uint32_t i915_emit_const2f(struct i915_fp_compile *p, float c0, float c1); diff --git a/src/gallium/drivers/i915/i915_fpc_emit.c b/src/gallium/drivers/i915/i915_fpc_emit.c index 603c79e089f..aeace4396ca 100644 --- a/src/gallium/drivers/i915/i915_fpc_emit.c +++ b/src/gallium/drivers/i915/i915_fpc_emit.c @@ -25,11 +25,45 @@ * **************************************************************************/ +#include + +#include "util/ralloc.h" #include "util/u_math.h" +#include "util/u_memory.h" #include "i915_context.h" #include "i915_fpc.h" #include "i915_reg.h" +void +i915_program_error(struct i915_fp_compile *p, const char *msg, ...) +{ + va_list args; + va_start(args, msg); + ralloc_vasprintf_append(&p->error, msg, args); + va_end(args); +} + +static const unsigned passthrough_program[] = { + _3DSTATE_PIXEL_SHADER_PROGRAM | ((1 * 3) - 1), + (A0_MOV | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | A0_DEST_CHANNEL_ALL | + (REG_TYPE_R << A0_SRC0_TYPE_SHIFT) | (0 << A0_SRC0_NR_SHIFT)), + ((SRC_ONE << A1_SRC0_CHANNEL_X_SHIFT) | + (SRC_ZERO << A1_SRC0_CHANNEL_Y_SHIFT) | + (SRC_ZERO << A1_SRC0_CHANNEL_Z_SHIFT) | + (SRC_ONE << A1_SRC0_CHANNEL_W_SHIFT)), + 0}; + +void +i915_use_passthrough_shader(struct i915_fragment_shader *fs) +{ + fs->program = (uint32_t *)MALLOC(sizeof(passthrough_program)); + if (fs->program) { + memcpy(fs->program, passthrough_program, sizeof(passthrough_program)); + fs->program_len = ARRAY_SIZE(passthrough_program); + } + fs->num_constants = 0; +} + uint32_t i915_get_temp(struct i915_fp_compile *p) { @@ -43,7 +77,7 @@ i915_get_temp(struct i915_fp_compile *p) return bit - 1; } -static void +void i915_release_temp(struct i915_fp_compile *p, int reg) { p->temp_flag &= ~(1 << reg); @@ -179,8 +213,6 @@ i915_emit_texld(struct i915_fp_compile *p, uint32_t dest, uint32_t destmask, { const uint32_t k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord)); - int temp = -1; - uint32_t coord_used = 0xf << UREG_CHANNEL_X_SHIFT; if (coord_mask & TGSI_WRITEMASK_Y) coord_used |= 0xf << UREG_CHANNEL_Y_SHIFT; @@ -191,13 +223,10 @@ i915_emit_texld(struct i915_fp_compile *p, uint32_t dest, uint32_t destmask, if ((coord & coord_used) != (k & coord_used) || GET_UREG_TYPE(coord) == REG_TYPE_CONST) { - /* texcoord is swizzled or negated. Need to allocate a new temporary - * register (a utemp / unpreserved temp) won't do. + /* texcoord is swizzled or negated. Need a temporary to hold it. + * Use a utemp so it doesn't create a tex indirect phase boundary. */ - uint32_t tempReg; - - temp = i915_get_temp(p); /* get temp reg index */ - tempReg = UREG(REG_TYPE_R, temp); /* make i915 register */ + uint32_t tempReg = i915_get_utemp(p); i915_emit_arith(p, A0_MOV, tempReg, A0_DEST_CHANNEL_ALL, /* dest reg, writemask */ @@ -227,11 +256,21 @@ i915_emit_texld(struct i915_fp_compile *p, uint32_t dest, uint32_t destmask, p->nr_tex_indirect++; /* Reading from an r# register whose contents depend on output of the - * current phase defines a phase boundary. + * current phase defines a phase boundary. Prefer just bumping the + * phase count (free), but if we'd exceed the HW limit, copy to a + * utemp instead (costs 1 ALU instruction). */ if (GET_UREG_TYPE(coord) == REG_TYPE_R && - p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect) - p->nr_tex_indirect++; + p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect) { + if (p->nr_tex_indirect + 1 < I915_MAX_TEX_INDIRECT) { + p->nr_tex_indirect++; + } else { + uint32_t tmp = i915_get_utemp(p); + i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0, + coord, 0, 0); + coord = tmp; + } + } if (p->csr < p->program + I915_PROGRAM_SIZE) { *(p->csr++) = (opcode | T0_DEST(dest) | T0_SAMPLER(sampler)); @@ -246,40 +285,75 @@ i915_emit_texld(struct i915_fp_compile *p, uint32_t dest, uint32_t destmask, p->nr_tex_insn++; } - if (temp >= 0) - i915_release_temp(p, temp); - return dest; } +static uint32_t +i915_try_const1f_in_reg(struct i915_fp_compile *p, float c0, unsigned reg) +{ + struct i915_fragment_shader *ifs = p->shader; + + for (unsigned idx = 0; idx < 4; idx++) { + if (ifs->constant_flags[reg] & I915_CONSTFLAG_USER_CH(idx)) + continue; + if (!(ifs->constant_flags[reg] & I915_CONSTFLAG_IMM(idx)) || + ifs->constants[reg][idx] == c0) { + ifs->constants[reg][idx] = c0; + ifs->constant_flags[reg] |= I915_CONSTFLAG_IMM(idx); + if (reg + 1 > ifs->num_constants) + ifs->num_constants = reg + 1; + return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE); + } + } + return UREG_BAD; +} + +static uint32_t +i915_try_emit_const1f(struct i915_fp_compile *p, float c0, int preferred_reg) +{ + if (preferred_reg >= 0) { + uint32_t r = i915_try_const1f_in_reg(p, c0, preferred_reg); + if (r != UREG_BAD) + return r; + } + + for (unsigned reg = 0; reg < I915_MAX_CONSTANT; reg++) { + uint32_t r = i915_try_const1f_in_reg(p, c0, reg); + if (r != UREG_BAD) + return r; + } + + i915_program_error(p, "i915_emit_const1f: out of constants"); + return 0; +} + uint32_t i915_emit_const1f(struct i915_fp_compile *p, float c0) { - struct i915_fragment_shader *ifs = p->shader; - unsigned reg, idx; - if (c0 == 0.0) return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO); if (c0 == 1.0) return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE); + if (c0 == -1.0) + return negate(swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE), + 1, 1, 1, 1); - for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { - if (ifs->constant_flags[reg] == I915_CONSTFLAG_USER) - continue; - for (idx = 0; idx < 4; idx++) { - if (!(ifs->constant_flags[reg] & (1 << idx)) || - ifs->constants[reg][idx] == c0) { - ifs->constants[reg][idx] = c0; - ifs->constant_flags[reg] |= 1 << idx; - if (reg + 1 > ifs->num_constants) - ifs->num_constants = reg + 1; - return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE); - } - } - } + return i915_try_emit_const1f(p, c0, -1); +} - i915_program_error(p, "i915_emit_const1f: out of constants"); - return 0; +uint32_t +i915_emit_const1f_prefer(struct i915_fp_compile *p, float c0, + int preferred_reg) +{ + if (c0 == 0.0) + return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO); + if (c0 == 1.0) + return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE); + if (c0 == -1.0) + return negate(swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE), + 1, 1, 1, 1); + + return i915_try_emit_const1f(p, c0, preferred_reg); } uint32_t @@ -301,14 +375,15 @@ i915_emit_const2f(struct i915_fp_compile *p, float c0, float c1) // XXX emit swizzle here for 0, 1, -1 and any combination thereof // we can use swizzle + neg for that for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { - if (ifs->constant_flags[reg] == 0xf || - ifs->constant_flags[reg] == I915_CONSTFLAG_USER) + uint8_t occupied = (ifs->constant_flags[reg] & 0xf) | + (ifs->constant_flags[reg] >> 4); + if (occupied == 0xf) continue; for (idx = 0; idx < 3; idx++) { - if (!(ifs->constant_flags[reg] & (3 << idx))) { + if (!(occupied & (3 << idx))) { ifs->constants[reg][idx + 0] = c0; ifs->constants[reg][idx + 1] = c1; - ifs->constant_flags[reg] |= 3 << idx; + ifs->constant_flags[reg] |= (3 << idx); /* immediate bits */ if (reg + 1 > ifs->num_constants) ifs->num_constants = reg + 1; return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE); @@ -330,9 +405,9 @@ i915_emit_const4f(struct i915_fp_compile *p, float c0, float c1, float c2, // XXX emit swizzle here for 0, 1, -1 and any combination thereof // we can use swizzle + neg for that for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { - if (ifs->constant_flags[reg] == 0xf && ifs->constants[reg][0] == c0 && - ifs->constants[reg][1] == c1 && ifs->constants[reg][2] == c2 && - ifs->constants[reg][3] == c3) { + if ((ifs->constant_flags[reg] & 0x0f) == 0x0f && + ifs->constants[reg][0] == c0 && ifs->constants[reg][1] == c1 && + ifs->constants[reg][2] == c2 && ifs->constants[reg][3] == c3) { return UREG(REG_TYPE_CONST, reg); } else if (ifs->constant_flags[reg] == 0) { @@ -340,7 +415,7 @@ i915_emit_const4f(struct i915_fp_compile *p, float c0, float c1, float c2, ifs->constants[reg][1] = c1; ifs->constants[reg][2] = c2; ifs->constants[reg][3] = c3; - ifs->constant_flags[reg] = 0xf; + ifs->constant_flags[reg] = 0x0f; if (reg + 1 > ifs->num_constants) ifs->num_constants = reg + 1; return UREG(REG_TYPE_CONST, reg); diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index ba6f6172e85..9277e55e9e3 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -54,55 +54,9 @@ * Simple pass-through fragment shader to use when we don't have * a real shader (or it fails to compile for some reason). */ -static unsigned passthrough_program[] = { - _3DSTATE_PIXEL_SHADER_PROGRAM | ((1 * 3) - 1), - /* move to output color: - */ - (A0_MOV | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | A0_DEST_CHANNEL_ALL | - (REG_TYPE_R << A0_SRC0_TYPE_SHIFT) | (0 << A0_SRC0_NR_SHIFT)), - ((SRC_ONE << A1_SRC0_CHANNEL_X_SHIFT) | - (SRC_ZERO << A1_SRC0_CHANNEL_Y_SHIFT) | - (SRC_ZERO << A1_SRC0_CHANNEL_Z_SHIFT) | - (SRC_ONE << A1_SRC0_CHANNEL_W_SHIFT)), - 0}; - /** * component-wise negation of ureg */ -static inline int -negate(int reg, int x, int y, int z, int w) -{ - /* Another neat thing about the UREG representation */ - return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) | - ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) | - ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) | - ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT)); -} - -/** - * In the event of a translation failure, we'll generate a simple color - * pass-through program. - */ -static void -i915_use_passthrough_shader(struct i915_fragment_shader *fs) -{ - fs->program = (uint32_t *)MALLOC(sizeof(passthrough_program)); - if (fs->program) { - memcpy(fs->program, passthrough_program, sizeof(passthrough_program)); - fs->program_len = ARRAY_SIZE(passthrough_program); - } - fs->num_constants = 0; -} - -void -i915_program_error(struct i915_fp_compile *p, const char *msg, ...) -{ - va_list args; - va_start(args, msg); - ralloc_vasprintf_append(&p->error, msg, args); - va_end(args); -} - static uint32_t get_mapping(struct i915_fragment_shader *fs, enum tgsi_semantic semantic, int index) @@ -1023,6 +977,10 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) assert(!ifs->program); ifs->program_len = decl_size + program_size; + ifs->nr_alu_insn = p->nr_alu_insn; + ifs->nr_tex_insn = p->nr_tex_insn; + ifs->nr_tex_indirect = p->nr_tex_indirect; + ifs->nr_temps = util_bitcount(p->temp_flag); ifs->program = (uint32_t *)MALLOC(ifs->program_len * sizeof(uint32_t)); memcpy(ifs->program, p->declarations, decl_size * sizeof(uint32_t)); memcpy(&ifs->program[decl_size], p->program, @@ -1034,10 +992,9 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) "%s shader: %d instructions, %d alu, %d tex, %d tex_indirect, " "%d temps, %d const", _mesa_shader_stage_to_abbrev(MESA_SHADER_FRAGMENT), - p->nr_alu_insn + p->nr_tex_insn, - p->nr_alu_insn, p->nr_tex_insn, p->nr_tex_indirect, - p->shader->info.file_max[TGSI_FILE_TEMPORARY] + 1, - ifs->num_constants); + ifs->nr_alu_insn + ifs->nr_tex_insn, + ifs->nr_alu_insn, ifs->nr_tex_insn, ifs->nr_tex_indirect, + ifs->nr_temps, ifs->num_constants); } } diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index f3561b143e8..8a92d6d0a7b 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -332,28 +332,33 @@ emit_constants(struct i915_context *i915) OUT_BATCH((1 << nr) - 1); for (i = 0; i < nr; i++) { - const uint32_t *c; - if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) { - /* grab user-defined constant */ - c = (uint32_t *)i915_buffer(i915->constants[MESA_SHADER_FRAGMENT]) - ->data; + uint8_t flags = i915->fs->constant_flags[i]; + uint8_t user_mask = flags >> 4; + + if (!user_mask) { + const uint32_t *c = (uint32_t *)i915->fs->constants[i]; + OUT_BATCH(c[0]); + OUT_BATCH(c[1]); + OUT_BATCH(c[2]); + OUT_BATCH(c[3]); + } else if (user_mask == 0xf) { + const uint32_t *c = + (uint32_t *)i915_buffer(i915->constants[MESA_SHADER_FRAGMENT]) + ->data; c += 4 * i; + OUT_BATCH(c[0]); + OUT_BATCH(c[1]); + OUT_BATCH(c[2]); + OUT_BATCH(c[3]); } else { - /* emit program constant */ - c = (uint32_t *)i915->fs->constants[i]; + const uint32_t *user = + (uint32_t *)i915_buffer(i915->constants[MESA_SHADER_FRAGMENT]) + ->data; + user += 4 * i; + const uint32_t *imm = (uint32_t *)i915->fs->constants[i]; + for (unsigned ch = 0; ch < 4; ch++) + OUT_BATCH((user_mask & (1 << ch)) ? user[ch] : imm[ch]); } -#if 0 /* debug */ - { - float *f = (float *) c; - printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3], - (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER - ? "user" : "immediate")); - } -#endif - OUT_BATCH(*c++); - OUT_BATCH(*c++); - OUT_BATCH(*c++); - OUT_BATCH(*c++); } } }