i915: refactor constant and compiler infrastructure for NIR backend

Rework the constant register encoding to track per-channel ownership
(I915_CONSTFLAG_IMM / I915_CONSTFLAG_USER_CH) instead of whole-register
flags, allowing compiler immediates and user UBO values to share a
constant register on different channels.  Update emit_constants() to
handle per-channel source selection at upload time.

Add i915_emit_const1f_prefer() for packing scalar constants into a
preferred register, reducing dual-constant conflicts.

Move i915_program_error(), i915_use_passthrough_shader(), and negate()
from i915_fpc_translate.c to shared locations (i915_fpc_emit.c /
i915_fpc.h) so the NIR backend can use them.

Fix i915_emit_texld() to use a utemp instead of a temp register for
texcoord swizzle copies, avoiding unnecessary tex indirect phase
boundaries.  Add a fallback path that copies to a utemp when bumping
the phase count would exceed the hardware limit.

Add nr_alu_insn, nr_tex_insn, nr_tex_indirect, nr_temps, writes_z,
and input semantic tracking to i915_fragment_shader for use by the
NIR backend's multi-variant comparison framework.

Assisted-by: Claude
This commit is contained in:
Adam Jackson 2026-05-06 12:43:25 -04:00
parent badd52c7d5
commit 4087e3b7ef
5 changed files with 190 additions and 116 deletions

View file

@ -88,8 +88,15 @@ struct i915_winsys_batchbuffer;
#define I915_MAX_CONSTANT 32
/** See constant_flags[] below */
#define I915_CONSTFLAG_USER 0x1f
/**
* Per-channel flags for constant_flags[].
* Bits 0-3: channel has a compiler immediate.
* Bits 4-7: channel has a user (UBO) value uploaded at draw time.
* A channel is available when neither bit is set.
*/
#define I915_CONSTFLAG_IMM(ch) (1 << (ch))
#define I915_CONSTFLAG_USER_CH(ch) (1 << ((ch) + 4))
#define I915_CONSTFLAG_USER 0xf0
/**
* Subclass of pipe_shader_state
@ -103,6 +110,10 @@ struct i915_fragment_shader {
uint32_t *program;
uint32_t program_len;
uint32_t nr_alu_insn;
uint32_t nr_tex_insn;
uint32_t nr_tex_indirect;
uint32_t nr_temps;
/**
* constants introduced during translation.
@ -134,12 +145,15 @@ struct i915_fragment_shader {
} texcoords[I915_TEX_UNITS];
bool reads_pntc;
bool writes_z;
unsigned num_inputs;
uint8_t input_semantic_name[PIPE_MAX_SHADER_INPUTS];
uint8_t input_semantic_index[PIPE_MAX_SHADER_INPUTS];
/* Set if the shader is an internal (blit, etc.) shader that shouldn't debug
* log by default. */
bool internal;
char *error; /* Any error message from compiling this shader (or NULL) */
char *error;
};
struct i915_cache_context;

View file

@ -136,6 +136,15 @@ swizzle(int reg, uint32_t x, uint32_t y, uint32_t z, uint32_t w)
CHANNEL_SRC(GET_CHANNEL_SRC(reg, w), 3));
}
static inline int
negate(int reg, int x, int y, int z, int w)
{
return reg ^ (x << UREG_CHANNEL_X_NEGATE_SHIFT |
y << UREG_CHANNEL_Y_NEGATE_SHIFT |
z << UREG_CHANNEL_Z_NEGATE_SHIFT |
w << UREG_CHANNEL_W_NEGATE_SHIFT);
}
#define A0_DEST(reg) (((reg)&UREG_TYPE_NR_MASK) >> UREG_A0_DEST_SHIFT_LEFT)
#define D0_DEST(reg) (((reg)&UREG_TYPE_NR_MASK) >> UREG_A0_DEST_SHIFT_LEFT)
#define T0_DEST(reg) (((reg)&UREG_TYPE_NR_MASK) >> UREG_A0_DEST_SHIFT_LEFT)
@ -173,8 +182,20 @@ swizzle(int reg, uint32_t x, uint32_t y, uint32_t z, uint32_t w)
*/
extern void i915_translate_fragment_program(struct i915_context *i915,
struct i915_fragment_shader *fs);
struct corm_compile_opts {
bool deferred_const;
bool seq_sne_opt;
};
extern void i915_translate_fragment_program_nir(struct i915_context *i915,
struct i915_fragment_shader *ifs,
struct nir_shader *s,
const struct corm_compile_opts *opts);
extern void i915_use_passthrough_shader(struct i915_fragment_shader *fs);
extern void i915_program_error(struct i915_fp_compile *p, const char *msg, ...);
extern uint32_t i915_get_temp(struct i915_fp_compile *p);
extern void i915_release_temp(struct i915_fp_compile *p, int reg);
extern uint32_t i915_get_utemp(struct i915_fp_compile *p);
extern void i915_release_utemps(struct i915_fp_compile *p);
@ -191,6 +212,8 @@ extern uint32_t i915_emit_decl(struct i915_fp_compile *p, uint32_t type,
uint32_t nr, uint32_t d0_flags);
extern uint32_t i915_emit_const1f(struct i915_fp_compile *p, float c0);
extern uint32_t i915_emit_const1f_prefer(struct i915_fp_compile *p, float c0,
int preferred_reg);
extern uint32_t i915_emit_const2f(struct i915_fp_compile *p, float c0,
float c1);

View file

@ -25,11 +25,45 @@
*
**************************************************************************/
#include <stdarg.h>
#include "util/ralloc.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "i915_context.h"
#include "i915_fpc.h"
#include "i915_reg.h"
void
i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
{
va_list args;
va_start(args, msg);
ralloc_vasprintf_append(&p->error, msg, args);
va_end(args);
}
static const unsigned passthrough_program[] = {
_3DSTATE_PIXEL_SHADER_PROGRAM | ((1 * 3) - 1),
(A0_MOV | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | A0_DEST_CHANNEL_ALL |
(REG_TYPE_R << A0_SRC0_TYPE_SHIFT) | (0 << A0_SRC0_NR_SHIFT)),
((SRC_ONE << A1_SRC0_CHANNEL_X_SHIFT) |
(SRC_ZERO << A1_SRC0_CHANNEL_Y_SHIFT) |
(SRC_ZERO << A1_SRC0_CHANNEL_Z_SHIFT) |
(SRC_ONE << A1_SRC0_CHANNEL_W_SHIFT)),
0};
void
i915_use_passthrough_shader(struct i915_fragment_shader *fs)
{
fs->program = (uint32_t *)MALLOC(sizeof(passthrough_program));
if (fs->program) {
memcpy(fs->program, passthrough_program, sizeof(passthrough_program));
fs->program_len = ARRAY_SIZE(passthrough_program);
}
fs->num_constants = 0;
}
uint32_t
i915_get_temp(struct i915_fp_compile *p)
{
@ -43,7 +77,7 @@ i915_get_temp(struct i915_fp_compile *p)
return bit - 1;
}
static void
void
i915_release_temp(struct i915_fp_compile *p, int reg)
{
p->temp_flag &= ~(1 << reg);
@ -179,8 +213,6 @@ i915_emit_texld(struct i915_fp_compile *p, uint32_t dest, uint32_t destmask,
{
const uint32_t k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord));
int temp = -1;
uint32_t coord_used = 0xf << UREG_CHANNEL_X_SHIFT;
if (coord_mask & TGSI_WRITEMASK_Y)
coord_used |= 0xf << UREG_CHANNEL_Y_SHIFT;
@ -191,13 +223,10 @@ i915_emit_texld(struct i915_fp_compile *p, uint32_t dest, uint32_t destmask,
if ((coord & coord_used) != (k & coord_used) ||
GET_UREG_TYPE(coord) == REG_TYPE_CONST) {
/* texcoord is swizzled or negated. Need to allocate a new temporary
* register (a utemp / unpreserved temp) won't do.
/* texcoord is swizzled or negated. Need a temporary to hold it.
* Use a utemp so it doesn't create a tex indirect phase boundary.
*/
uint32_t tempReg;
temp = i915_get_temp(p); /* get temp reg index */
tempReg = UREG(REG_TYPE_R, temp); /* make i915 register */
uint32_t tempReg = i915_get_utemp(p);
i915_emit_arith(p, A0_MOV, tempReg,
A0_DEST_CHANNEL_ALL, /* dest reg, writemask */
@ -227,11 +256,21 @@ i915_emit_texld(struct i915_fp_compile *p, uint32_t dest, uint32_t destmask,
p->nr_tex_indirect++;
/* Reading from an r# register whose contents depend on output of the
* current phase defines a phase boundary.
* current phase defines a phase boundary. Prefer just bumping the
* phase count (free), but if we'd exceed the HW limit, copy to a
* utemp instead (costs 1 ALU instruction).
*/
if (GET_UREG_TYPE(coord) == REG_TYPE_R &&
p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect)
p->nr_tex_indirect++;
p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect) {
if (p->nr_tex_indirect + 1 < I915_MAX_TEX_INDIRECT) {
p->nr_tex_indirect++;
} else {
uint32_t tmp = i915_get_utemp(p);
i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0,
coord, 0, 0);
coord = tmp;
}
}
if (p->csr < p->program + I915_PROGRAM_SIZE) {
*(p->csr++) = (opcode | T0_DEST(dest) | T0_SAMPLER(sampler));
@ -246,40 +285,75 @@ i915_emit_texld(struct i915_fp_compile *p, uint32_t dest, uint32_t destmask,
p->nr_tex_insn++;
}
if (temp >= 0)
i915_release_temp(p, temp);
return dest;
}
static uint32_t
i915_try_const1f_in_reg(struct i915_fp_compile *p, float c0, unsigned reg)
{
struct i915_fragment_shader *ifs = p->shader;
for (unsigned idx = 0; idx < 4; idx++) {
if (ifs->constant_flags[reg] & I915_CONSTFLAG_USER_CH(idx))
continue;
if (!(ifs->constant_flags[reg] & I915_CONSTFLAG_IMM(idx)) ||
ifs->constants[reg][idx] == c0) {
ifs->constants[reg][idx] = c0;
ifs->constant_flags[reg] |= I915_CONSTFLAG_IMM(idx);
if (reg + 1 > ifs->num_constants)
ifs->num_constants = reg + 1;
return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE);
}
}
return UREG_BAD;
}
static uint32_t
i915_try_emit_const1f(struct i915_fp_compile *p, float c0, int preferred_reg)
{
if (preferred_reg >= 0) {
uint32_t r = i915_try_const1f_in_reg(p, c0, preferred_reg);
if (r != UREG_BAD)
return r;
}
for (unsigned reg = 0; reg < I915_MAX_CONSTANT; reg++) {
uint32_t r = i915_try_const1f_in_reg(p, c0, reg);
if (r != UREG_BAD)
return r;
}
i915_program_error(p, "i915_emit_const1f: out of constants");
return 0;
}
uint32_t
i915_emit_const1f(struct i915_fp_compile *p, float c0)
{
struct i915_fragment_shader *ifs = p->shader;
unsigned reg, idx;
if (c0 == 0.0)
return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO);
if (c0 == 1.0)
return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE);
if (c0 == -1.0)
return negate(swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE),
1, 1, 1, 1);
for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
if (ifs->constant_flags[reg] == I915_CONSTFLAG_USER)
continue;
for (idx = 0; idx < 4; idx++) {
if (!(ifs->constant_flags[reg] & (1 << idx)) ||
ifs->constants[reg][idx] == c0) {
ifs->constants[reg][idx] = c0;
ifs->constant_flags[reg] |= 1 << idx;
if (reg + 1 > ifs->num_constants)
ifs->num_constants = reg + 1;
return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE);
}
}
}
return i915_try_emit_const1f(p, c0, -1);
}
i915_program_error(p, "i915_emit_const1f: out of constants");
return 0;
uint32_t
i915_emit_const1f_prefer(struct i915_fp_compile *p, float c0,
int preferred_reg)
{
if (c0 == 0.0)
return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO);
if (c0 == 1.0)
return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE);
if (c0 == -1.0)
return negate(swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE),
1, 1, 1, 1);
return i915_try_emit_const1f(p, c0, preferred_reg);
}
uint32_t
@ -301,14 +375,15 @@ i915_emit_const2f(struct i915_fp_compile *p, float c0, float c1)
// XXX emit swizzle here for 0, 1, -1 and any combination thereof
// we can use swizzle + neg for that
for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
if (ifs->constant_flags[reg] == 0xf ||
ifs->constant_flags[reg] == I915_CONSTFLAG_USER)
uint8_t occupied = (ifs->constant_flags[reg] & 0xf) |
(ifs->constant_flags[reg] >> 4);
if (occupied == 0xf)
continue;
for (idx = 0; idx < 3; idx++) {
if (!(ifs->constant_flags[reg] & (3 << idx))) {
if (!(occupied & (3 << idx))) {
ifs->constants[reg][idx + 0] = c0;
ifs->constants[reg][idx + 1] = c1;
ifs->constant_flags[reg] |= 3 << idx;
ifs->constant_flags[reg] |= (3 << idx); /* immediate bits */
if (reg + 1 > ifs->num_constants)
ifs->num_constants = reg + 1;
return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE);
@ -330,9 +405,9 @@ i915_emit_const4f(struct i915_fp_compile *p, float c0, float c1, float c2,
// XXX emit swizzle here for 0, 1, -1 and any combination thereof
// we can use swizzle + neg for that
for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
if (ifs->constant_flags[reg] == 0xf && ifs->constants[reg][0] == c0 &&
ifs->constants[reg][1] == c1 && ifs->constants[reg][2] == c2 &&
ifs->constants[reg][3] == c3) {
if ((ifs->constant_flags[reg] & 0x0f) == 0x0f &&
ifs->constants[reg][0] == c0 && ifs->constants[reg][1] == c1 &&
ifs->constants[reg][2] == c2 && ifs->constants[reg][3] == c3) {
return UREG(REG_TYPE_CONST, reg);
} else if (ifs->constant_flags[reg] == 0) {
@ -340,7 +415,7 @@ i915_emit_const4f(struct i915_fp_compile *p, float c0, float c1, float c2,
ifs->constants[reg][1] = c1;
ifs->constants[reg][2] = c2;
ifs->constants[reg][3] = c3;
ifs->constant_flags[reg] = 0xf;
ifs->constant_flags[reg] = 0x0f;
if (reg + 1 > ifs->num_constants)
ifs->num_constants = reg + 1;
return UREG(REG_TYPE_CONST, reg);

View file

@ -54,55 +54,9 @@
* Simple pass-through fragment shader to use when we don't have
* a real shader (or it fails to compile for some reason).
*/
static unsigned passthrough_program[] = {
_3DSTATE_PIXEL_SHADER_PROGRAM | ((1 * 3) - 1),
/* move to output color:
*/
(A0_MOV | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | A0_DEST_CHANNEL_ALL |
(REG_TYPE_R << A0_SRC0_TYPE_SHIFT) | (0 << A0_SRC0_NR_SHIFT)),
((SRC_ONE << A1_SRC0_CHANNEL_X_SHIFT) |
(SRC_ZERO << A1_SRC0_CHANNEL_Y_SHIFT) |
(SRC_ZERO << A1_SRC0_CHANNEL_Z_SHIFT) |
(SRC_ONE << A1_SRC0_CHANNEL_W_SHIFT)),
0};
/**
* component-wise negation of ureg
*/
static inline int
negate(int reg, int x, int y, int z, int w)
{
/* Another neat thing about the UREG representation */
return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) |
((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) |
((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) |
((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT));
}
/**
* In the event of a translation failure, we'll generate a simple color
* pass-through program.
*/
static void
i915_use_passthrough_shader(struct i915_fragment_shader *fs)
{
fs->program = (uint32_t *)MALLOC(sizeof(passthrough_program));
if (fs->program) {
memcpy(fs->program, passthrough_program, sizeof(passthrough_program));
fs->program_len = ARRAY_SIZE(passthrough_program);
}
fs->num_constants = 0;
}
void
i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
{
va_list args;
va_start(args, msg);
ralloc_vasprintf_append(&p->error, msg, args);
va_end(args);
}
static uint32_t
get_mapping(struct i915_fragment_shader *fs, enum tgsi_semantic semantic,
int index)
@ -1023,6 +977,10 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
assert(!ifs->program);
ifs->program_len = decl_size + program_size;
ifs->nr_alu_insn = p->nr_alu_insn;
ifs->nr_tex_insn = p->nr_tex_insn;
ifs->nr_tex_indirect = p->nr_tex_indirect;
ifs->nr_temps = util_bitcount(p->temp_flag);
ifs->program = (uint32_t *)MALLOC(ifs->program_len * sizeof(uint32_t));
memcpy(ifs->program, p->declarations, decl_size * sizeof(uint32_t));
memcpy(&ifs->program[decl_size], p->program,
@ -1034,10 +992,9 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
"%s shader: %d instructions, %d alu, %d tex, %d tex_indirect, "
"%d temps, %d const",
_mesa_shader_stage_to_abbrev(MESA_SHADER_FRAGMENT),
p->nr_alu_insn + p->nr_tex_insn,
p->nr_alu_insn, p->nr_tex_insn, p->nr_tex_indirect,
p->shader->info.file_max[TGSI_FILE_TEMPORARY] + 1,
ifs->num_constants);
ifs->nr_alu_insn + ifs->nr_tex_insn,
ifs->nr_alu_insn, ifs->nr_tex_insn, ifs->nr_tex_indirect,
ifs->nr_temps, ifs->num_constants);
}
}

View file

@ -332,28 +332,33 @@ emit_constants(struct i915_context *i915)
OUT_BATCH((1 << nr) - 1);
for (i = 0; i < nr; i++) {
const uint32_t *c;
if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) {
/* grab user-defined constant */
c = (uint32_t *)i915_buffer(i915->constants[MESA_SHADER_FRAGMENT])
->data;
uint8_t flags = i915->fs->constant_flags[i];
uint8_t user_mask = flags >> 4;
if (!user_mask) {
const uint32_t *c = (uint32_t *)i915->fs->constants[i];
OUT_BATCH(c[0]);
OUT_BATCH(c[1]);
OUT_BATCH(c[2]);
OUT_BATCH(c[3]);
} else if (user_mask == 0xf) {
const uint32_t *c =
(uint32_t *)i915_buffer(i915->constants[MESA_SHADER_FRAGMENT])
->data;
c += 4 * i;
OUT_BATCH(c[0]);
OUT_BATCH(c[1]);
OUT_BATCH(c[2]);
OUT_BATCH(c[3]);
} else {
/* emit program constant */
c = (uint32_t *)i915->fs->constants[i];
const uint32_t *user =
(uint32_t *)i915_buffer(i915->constants[MESA_SHADER_FRAGMENT])
->data;
user += 4 * i;
const uint32_t *imm = (uint32_t *)i915->fs->constants[i];
for (unsigned ch = 0; ch < 4; ch++)
OUT_BATCH((user_mask & (1 << ch)) ? user[ch] : imm[ch]);
}
#if 0 /* debug */
{
float *f = (float *) c;
printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3],
(i915->fs->constant_flags[i] == I915_CONSTFLAG_USER
? "user" : "immediate"));
}
#endif
OUT_BATCH(*c++);
OUT_BATCH(*c++);
OUT_BATCH(*c++);
OUT_BATCH(*c++);
}
}
}