i915/corm: add NIR fragment shader backend

Bare-minimum NIR-to-i915 fragment shader compiler with multi-variant
framework, lexicographic cost metric (ALU > tex_indirect > temps > consts),
and winner-tagged stats output.

Stats are emitted once per shader with [NIR] or [TGSI] tag indicating
which backend won.  The corm_compile_opts struct is available for
multi-variant compilation (currently empty).

Assisted-by: Claude

shader-db (I915_FS=nir): 48/403 compiled, 65 alu
shader-db (I915_FS=both): nir won 48 (26 identical, 16 tied, 6 better),
  236 TGSI, 119 neither
This commit is contained in:
Adam Jackson 2026-05-06 12:45:51 -04:00
parent 4087e3b7ef
commit 3d3b557780
4 changed files with 1130 additions and 27 deletions

View file

@ -0,0 +1,821 @@
/*
* Copyright 2025 Red Hat, Inc.
* SPDX-License-Identifier: MIT
*/
#include "compiler/nir/nir.h"
#include "tgsi/tgsi_from_mesa.h"
#include "util/log.h"
#include "util/ralloc.h"
#include "util/u_memory.h"
#include "i915_context.h"
#include "i915_debug.h"
#include "i915_debug_private.h"
#include "i915_fpc.h"
#include "i915_reg.h"
struct nir_to_i915 {
struct corm_compile_opts opts;
struct i915_fp_compile *p;
struct i915_fragment_shader *ifs;
uint32_t *ureg_map;
unsigned ureg_map_size;
};
static void
set_ureg(struct nir_to_i915 *c, nir_def *def, uint32_t ureg)
{
assert(def->index < c->ureg_map_size);
c->ureg_map[def->index] = ureg;
}
static uint32_t
src_ureg(struct nir_to_i915 *c, nir_src *src)
{
assert(src->ssa->index < c->ureg_map_size);
return c->ureg_map[src->ssa->index];
}
static uint32_t
alu_src_ureg(struct nir_to_i915 *c, nir_alu_src *src)
{
uint32_t ureg = src_ureg(c, &src->src);
return swizzle(ureg,
src->swizzle[0], src->swizzle[1],
src->swizzle[2], src->swizzle[3]);
}
static uint32_t
def_mask(nir_def *def)
{
uint32_t mask = 0;
if (def->num_components >= 1) mask |= A0_DEST_CHANNEL_X;
if (def->num_components >= 2) mask |= A0_DEST_CHANNEL_Y;
if (def->num_components >= 3) mask |= A0_DEST_CHANNEL_Z;
if (def->num_components >= 4) mask |= A0_DEST_CHANNEL_W;
return mask;
}
static uint32_t
writemask_to_mask(unsigned wm)
{
uint32_t mask = 0;
if (wm & 1) mask |= A0_DEST_CHANNEL_X;
if (wm & 2) mask |= A0_DEST_CHANNEL_Y;
if (wm & 4) mask |= A0_DEST_CHANNEL_Z;
if (wm & 8) mask |= A0_DEST_CHANNEL_W;
return mask;
}
static uint32_t
get_texcoord_mapping(struct i915_fragment_shader *fs,
unsigned semantic, int index)
{
for (int i = 0; i < I915_TEX_UNITS; i++) {
if (fs->texcoords[i].semantic == -1) {
fs->texcoords[i].semantic = semantic;
fs->texcoords[i].index = index;
return i;
}
if (fs->texcoords[i].semantic == (int)semantic &&
fs->texcoords[i].index == index)
return i;
}
return 0;
}
static uint32_t
emit_input(struct nir_to_i915 *c, unsigned location)
{
struct i915_fp_compile *p = c->p;
struct i915_fragment_shader *ifs = c->ifs;
unsigned sem_name, sem_index;
tgsi_get_gl_varying_semantic((gl_varying_slot)location, true,
&sem_name, &sem_index);
switch (sem_name) {
case TGSI_SEMANTIC_GENERIC:
case TGSI_SEMANTIC_TEXCOORD:
case TGSI_SEMANTIC_PCOORD:
case TGSI_SEMANTIC_POSITION: {
if (sem_name == TGSI_SEMANTIC_PCOORD)
ifs->reads_pntc = true;
int tc = get_texcoord_mapping(ifs, sem_name, sem_index);
return i915_emit_decl(p, REG_TYPE_T, T_TEX0 + tc, D0_CHANNEL_ALL);
}
case TGSI_SEMANTIC_COLOR:
if (sem_index == 0) {
return i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
} else {
return swizzle(
i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ),
X, Y, Z, ONE);
}
case TGSI_SEMANTIC_FOG:
return swizzle(
i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W),
W, W, W, W);
case TGSI_SEMANTIC_FACE: {
int tc = get_texcoord_mapping(ifs, sem_name, sem_index);
return i915_emit_decl(p, REG_TYPE_T, T_TEX0 + tc, D0_CHANNEL_X);
}
default:
i915_program_error(p, "Bad input location %d (semantic %d)",
location, sem_name);
return 0;
}
}
static void
emit_load_const(struct nir_to_i915 *c, nir_load_const_instr *load)
{
struct i915_fp_compile *p = c->p;
switch (load->def.num_components) {
case 1:
set_ureg(c, &load->def, i915_emit_const1f(p, load->value[0].f32));
break;
case 2:
set_ureg(c, &load->def,
i915_emit_const2f(p, load->value[0].f32,
load->value[1].f32));
break;
case 3:
case 4: {
float v[4] = {
load->value[0].f32,
load->def.num_components > 1 ? load->value[1].f32 : 0.0f,
load->def.num_components > 2 ? load->value[2].f32 : 0.0f,
load->def.num_components > 3 ? load->value[3].f32 : 0.0f,
};
set_ureg(c, &load->def, i915_emit_const4fv(p, v));
break;
}
default:
i915_program_error(p, "load_const with %d components",
load->def.num_components);
break;
}
}
static void
emit_alu(struct nir_to_i915 *c, nir_alu_instr *alu)
{
struct i915_fp_compile *p = c->p;
nir_def *def = &alu->def;
uint32_t mask = def_mask(def);
uint32_t dest = UREG(REG_TYPE_R, i915_get_temp(p));
set_ureg(c, def, dest);
uint32_t src0 = 0, src1 = 0, src2 = 0;
if (nir_op_infos[alu->op].num_inputs >= 1)
src0 = alu_src_ureg(c, &alu->src[0]);
if (nir_op_infos[alu->op].num_inputs >= 2)
src1 = alu_src_ureg(c, &alu->src[1]);
if (nir_op_infos[alu->op].num_inputs >= 3)
src2 = alu_src_ureg(c, &alu->src[2]);
switch (alu->op) {
case nir_op_mov:
case nir_op_fcanonicalize:
case nir_op_fneg: {
i915_release_temp(p, GET_UREG_NR(dest));
set_ureg(c, def, alu->op == nir_op_fneg ? negate(src0, 1, 1, 1, 1)
: src0);
return;
}
case nir_op_fabs:
i915_emit_arith(p, A0_MAX, dest, mask, 0,
src0, negate(src0, 1, 1, 1, 1), 0);
break;
case nir_op_fsat:
i915_emit_arith(p, A0_MOV, dest, mask, A0_DEST_SATURATE, src0, 0, 0);
break;
case nir_op_fadd:
i915_emit_arith(p, A0_ADD, dest, mask, 0, src0, src1, 0);
break;
case nir_op_fmul:
i915_emit_arith(p, A0_MUL, dest, mask, 0, src0, src1, 0);
break;
case nir_op_ffma:
i915_emit_arith(p, A0_MAD, dest, mask, 0, src0, src1, src2);
break;
case nir_op_fmin:
case nir_op_imin:
case nir_op_umin:
i915_emit_arith(p, A0_MIN, dest, mask, 0, src0, src1, 0);
break;
case nir_op_fmax:
case nir_op_imax:
case nir_op_umax:
i915_emit_arith(p, A0_MAX, dest, mask, 0, src0, src1, 0);
break;
case nir_op_ffloor:
i915_emit_arith(p, A0_FLR, dest, mask, 0, src0, 0, 0);
break;
case nir_op_ffract:
i915_emit_arith(p, A0_FRC, dest, mask, 0, src0, 0, 0);
break;
case nir_op_ftrunc:
i915_emit_arith(p, A0_TRC, dest, mask, 0, src0, 0, 0);
break;
case nir_op_fceil: {
uint32_t tmp = i915_get_utemp(p);
i915_emit_arith(p, A0_FLR, tmp, mask, 0,
negate(src0, 1, 1, 1, 1), 0, 0);
i915_emit_arith(p, A0_MOV, dest, mask, 0,
negate(tmp, 1, 1, 1, 1), 0, 0);
break;
}
case nir_op_frcp:
i915_emit_arith(p, A0_RCP, dest, mask, 0,
swizzle(src0, X, X, X, X), 0, 0);
break;
case nir_op_frsq:
i915_emit_arith(p, A0_RSQ, dest, mask, 0,
swizzle(src0, X, X, X, X), 0, 0);
break;
case nir_op_fsqrt: {
uint32_t tmp = i915_get_utemp(p);
i915_emit_arith(p, A0_RSQ, tmp, A0_DEST_CHANNEL_X, 0,
swizzle(src0, X, X, X, X), 0, 0);
i915_emit_arith(p, A0_MUL, dest, mask, 0,
src0, swizzle(tmp, X, X, X, X), 0);
break;
}
case nir_op_fexp2:
i915_emit_arith(p, A0_EXP, dest, mask, 0,
swizzle(src0, X, X, X, X), 0, 0);
break;
case nir_op_flog2:
i915_emit_arith(p, A0_LOG, dest, mask, 0,
swizzle(src0, X, X, X, X), 0, 0);
break;
case nir_op_fdot2:
case nir_op_fdot2_replicated:
i915_emit_arith(p, A0_DP3, dest, mask, 0,
swizzle(src0, X, Y, ZERO, ZERO), src1, 0);
break;
case nir_op_fdot3:
case nir_op_fdot3_replicated:
i915_emit_arith(p, A0_DP3, dest, mask, 0, src0, src1, 0);
break;
case nir_op_fdot4:
case nir_op_fdot4_replicated:
i915_emit_arith(p, A0_DP4, dest, mask, 0, src0, src1, 0);
break;
case nir_op_slt:
i915_emit_arith(p, A0_SLT, dest, mask, 0, src0, src1, 0);
break;
case nir_op_sge:
i915_emit_arith(p, A0_SGE, dest, mask, 0, src0, src1, 0);
break;
case nir_op_seq: {
/* seq(a,b) = sge(a,b) * sge(b,a) */
uint32_t tmp = i915_get_utemp(p);
i915_emit_arith(p, A0_SGE, tmp, A0_DEST_CHANNEL_ALL, 0,
src0, src1, 0);
i915_emit_arith(p, A0_SGE, dest, mask, 0, src1, src0, 0);
i915_emit_arith(p, A0_MUL, dest, mask, 0, dest, tmp, 0);
break;
}
case nir_op_sne: {
/* sne(a,b) = slt(a,b) + slt(b,a) */
uint32_t tmp = i915_get_utemp(p);
i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0,
src0, src1, 0);
i915_emit_arith(p, A0_SLT, dest, mask, 0, src1, src0, 0);
i915_emit_arith(p, A0_ADD, dest, mask, 0, dest, tmp, 0);
break;
}
case nir_op_fpow: {
uint32_t tmp = i915_get_utemp(p);
i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_X, 0,
swizzle(src0, X, X, X, X), 0, 0);
i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
i915_emit_arith(p, A0_EXP, dest, mask, 0,
swizzle(tmp, X, X, X, X), 0, 0);
break;
}
case nir_op_bcsel:
i915_emit_arith(p, A0_CMP, dest, mask, 0,
negate(src0, 1, 1, 1, 1), src2, src1);
break;
case nir_op_fcsel_ge:
i915_emit_arith(p, A0_CMP, dest, mask, 0, src0, src1, src2);
break;
case nir_op_fcsel_gt:
i915_emit_arith(p, A0_CMP, dest, mask, 0,
negate(src0, 1, 1, 1, 1), src2, src1);
break;
case nir_op_vec2:
case nir_op_vec3:
case nir_op_vec4: {
unsigned n = nir_op_infos[alu->op].num_inputs;
static const uint32_t chan_mask[] = {
A0_DEST_CHANNEL_X, A0_DEST_CHANNEL_Y,
A0_DEST_CHANNEL_Z, A0_DEST_CHANNEL_W,
};
for (unsigned i = 0; i < n; i++) {
uint32_t s = alu_src_ureg(c, &alu->src[i]);
i915_emit_arith(p, A0_MOV, dest, chan_mask[i] & mask, 0,
swizzle(s, X, X, X, X), 0, 0);
}
break;
}
case nir_op_fsign: {
uint32_t tmp = i915_get_utemp(p);
const uint32_t zero = swizzle(UREG(REG_TYPE_R, 0),
ZERO, ZERO, ZERO, ZERO);
i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0,
src0, zero, 0);
i915_emit_arith(p, A0_SLT, dest, mask, 0, zero, src0, 0);
i915_emit_arith(p, A0_ADD, dest, mask, 0,
dest, negate(tmp, 1, 1, 1, 1), 0);
break;
}
default:
i915_program_error(p, "unsupported NIR ALU op: %s",
nir_op_infos[alu->op].name);
break;
}
i915_release_utemps(p);
}
static uint32_t
translate_tex_type(struct i915_fp_compile *p, enum glsl_sampler_dim dim)
{
switch (dim) {
case GLSL_SAMPLER_DIM_1D:
case GLSL_SAMPLER_DIM_2D:
case GLSL_SAMPLER_DIM_RECT:
case GLSL_SAMPLER_DIM_EXTERNAL:
return D0_SAMPLE_TYPE_2D;
case GLSL_SAMPLER_DIM_3D:
return D0_SAMPLE_TYPE_VOLUME;
case GLSL_SAMPLER_DIM_CUBE:
return D0_SAMPLE_TYPE_CUBE;
default:
i915_program_error(p, "unsupported sampler dim %d", dim);
return D0_SAMPLE_TYPE_2D;
}
}
static uint32_t
tex_coord_mask(nir_tex_instr *tex)
{
uint32_t mask = TGSI_WRITEMASK_X;
switch (tex->sampler_dim) {
case GLSL_SAMPLER_DIM_1D:
case GLSL_SAMPLER_DIM_2D:
case GLSL_SAMPLER_DIM_RECT:
case GLSL_SAMPLER_DIM_EXTERNAL:
mask = TGSI_WRITEMASK_XY;
break;
case GLSL_SAMPLER_DIM_3D:
case GLSL_SAMPLER_DIM_CUBE:
mask = TGSI_WRITEMASK_XYZ;
break;
default:
break;
}
if (tex->is_shadow)
mask |= TGSI_WRITEMASK_Z;
if (tex->op == nir_texop_txb)
mask |= TGSI_WRITEMASK_W;
for (unsigned i = 0; i < tex->num_srcs; i++) {
if (tex->src[i].src_type == nir_tex_src_projector) {
mask |= TGSI_WRITEMASK_W;
break;
}
}
return mask;
}
static void
emit_tex(struct nir_to_i915 *c, nir_tex_instr *tex)
{
struct i915_fp_compile *p = c->p;
nir_def *def = &tex->def;
uint32_t dest = UREG(REG_TYPE_R, i915_get_temp(p));
set_ureg(c, def, dest);
uint32_t hw_tex = translate_tex_type(p, tex->sampler_dim);
uint32_t sampler = i915_emit_decl(p, REG_TYPE_S, tex->sampler_index, hw_tex);
uint32_t coord = 0;
uint32_t bias_or_proj = 0;
uint32_t shadow = 0;
bool has_bias = false, has_proj = false, has_shadow = false;
for (unsigned i = 0; i < tex->num_srcs; i++) {
switch (tex->src[i].src_type) {
case nir_tex_src_coord:
coord = src_ureg(c, &tex->src[i].src);
break;
case nir_tex_src_bias:
bias_or_proj = src_ureg(c, &tex->src[i].src);
has_bias = true;
break;
case nir_tex_src_projector:
bias_or_proj = src_ureg(c, &tex->src[i].src);
has_proj = true;
break;
case nir_tex_src_comparator:
shadow = src_ureg(c, &tex->src[i].src);
has_shadow = true;
break;
default:
break;
}
}
/* 1D textures: set Y = X so LOD works correctly when sampled as 2D */
if (tex->sampler_dim == GLSL_SAMPLER_DIM_1D)
coord = swizzle(coord, X, X, Z, W);
/* pack bias/projector/shadow into a single coord register if needed */
if (has_bias || has_proj || has_shadow) {
uint32_t tmp = UREG(REG_TYPE_R, i915_get_temp(p));
i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0, coord, 0, 0);
if (has_shadow)
i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_Z, 0,
swizzle(shadow, X, X, X, X), 0, 0);
if (has_bias || has_proj)
i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_W, 0,
swizzle(bias_or_proj, X, X, X, X), 0, 0);
coord = tmp;
}
uint32_t opcode;
if (tex->op == nir_texop_txb) {
opcode = T0_TEXLDB;
} else if (has_proj) {
opcode = T0_TEXLDP;
} else if (tex->op == nir_texop_tex) {
opcode = T0_TEXLD;
} else {
i915_program_error(p, "unsupported tex op %d", tex->op);
return;
}
i915_emit_texld(p, dest, A0_DEST_CHANNEL_ALL, sampler, coord, opcode,
tex_coord_mask(tex));
i915_release_utemps(p);
}
static void
emit_intrinsic(struct nir_to_i915 *c, nir_intrinsic_instr *intr)
{
struct i915_fp_compile *p = c->p;
struct i915_fragment_shader *ifs = c->ifs;
switch (intr->intrinsic) {
case nir_intrinsic_load_input: {
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
unsigned comp = nir_intrinsic_component(intr);
uint32_t reg = emit_input(c, sem.location);
if (comp > 0) {
reg = swizzle(reg, comp, MIN2(comp + 1, 3),
MIN2(comp + 2, 3), MIN2(comp + 3, 3));
}
set_ureg(c, &intr->def, reg);
break;
}
case nir_intrinsic_store_output: {
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
unsigned comp = nir_intrinsic_component(intr);
uint32_t val = src_ureg(c, &intr->src[0]);
uint32_t wm = nir_intrinsic_write_mask(intr);
uint32_t dest;
if (sem.location == FRAG_RESULT_DEPTH) {
dest = UREG(REG_TYPE_OD, 0);
} else {
dest = UREG(REG_TYPE_OC, 0);
}
if (comp > 0) {
uint32_t s[4] = { X, Y, Z, W };
for (int i = 3; i >= (int)comp; i--)
s[i] = s[i - comp];
for (unsigned i = 0; i < comp; i++)
s[i] = ZERO;
val = swizzle(val, s[0], s[1], s[2], s[3]);
wm <<= comp;
}
i915_emit_arith(p, A0_MOV, dest, writemask_to_mask(wm), 0,
val, 0, 0);
break;
}
case nir_intrinsic_load_ubo: {
nir_src *offset_src = &intr->src[1];
if (!nir_src_is_const(*offset_src)) {
i915_program_error(p, "non-constant UBO offset");
set_ureg(c, &intr->def,
swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO));
break;
}
unsigned byte_offset = (unsigned)nir_src_as_float(*offset_src);
unsigned slot = byte_offset / 16;
unsigned comp = (byte_offset % 16) / 4;
if (slot >= I915_MAX_CONSTANT) {
i915_program_error(p, "UBO offset %d exceeds max constants", slot);
set_ureg(c, &intr->def,
swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO));
break;
}
for (unsigned i = 0; i < intr->def.num_components; i++)
ifs->constant_flags[slot] |= I915_CONSTFLAG_USER;
ifs->num_constants = MAX2(ifs->num_constants, slot + 1);
uint32_t reg = UREG(REG_TYPE_CONST, slot);
if (comp > 0) {
uint32_t s[4];
for (unsigned i = 0; i < 4; i++)
s[i] = MIN2(comp + i, 3);
reg = swizzle(reg, s[0], s[1], s[2], s[3]);
}
set_ureg(c, &intr->def, reg);
break;
}
case nir_intrinsic_load_ubo_vec4: {
nir_src *offset_src = &intr->src[1];
if (!nir_src_is_const(*offset_src)) {
i915_program_error(p, "non-constant UBO offset");
set_ureg(c, &intr->def,
swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO));
break;
}
unsigned slot = nir_intrinsic_base(intr) +
(unsigned)nir_src_as_float(*offset_src);
unsigned comp = nir_intrinsic_component(intr);
if (slot >= I915_MAX_CONSTANT) {
i915_program_error(p, "UBO slot %d exceeds max constants", slot);
set_ureg(c, &intr->def,
swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO));
break;
}
for (unsigned i = 0; i < intr->def.num_components; i++)
ifs->constant_flags[slot] |= I915_CONSTFLAG_USER;
ifs->num_constants = MAX2(ifs->num_constants, slot + 1);
uint32_t reg = UREG(REG_TYPE_CONST, slot);
if (comp > 0) {
uint32_t s[4];
for (unsigned i = 0; i < 4; i++)
s[i] = MIN2(comp + i, 3);
reg = swizzle(reg, s[0], s[1], s[2], s[3]);
}
set_ureg(c, &intr->def, reg);
break;
}
case nir_intrinsic_terminate:
case nir_intrinsic_demote: {
uint32_t tmp = i915_get_utemp(p);
i915_emit_texld(p, tmp, A0_DEST_CHANNEL_ALL, 0,
negate(swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE),
1, 1, 1, 1),
T0_TEXKILL, TGSI_WRITEMASK_X);
i915_release_utemps(p);
break;
}
case nir_intrinsic_terminate_if:
case nir_intrinsic_demote_if: {
uint32_t cond = src_ureg(c, &intr->src[0]);
uint32_t tmp = i915_get_utemp(p);
i915_emit_texld(p, tmp, A0_DEST_CHANNEL_ALL, 0,
negate(swizzle(cond, X, X, X, X), 1, 1, 1, 1),
T0_TEXKILL, TGSI_WRITEMASK_XYZW);
i915_release_utemps(p);
break;
}
case nir_intrinsic_ddx:
case nir_intrinsic_ddy:
case nir_intrinsic_ddx_coarse:
case nir_intrinsic_ddy_coarse:
case nir_intrinsic_ddx_fine:
case nir_intrinsic_ddy_fine:
set_ureg(c, &intr->def,
swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO));
break;
default:
i915_program_error(p, "unsupported intrinsic: %s",
nir_intrinsic_infos[intr->intrinsic].name);
break;
}
}
static void
emit_instr(struct nir_to_i915 *c, nir_instr *instr)
{
switch (instr->type) {
case nir_instr_type_load_const:
emit_load_const(c, nir_instr_as_load_const(instr));
break;
case nir_instr_type_alu:
emit_alu(c, nir_instr_as_alu(instr));
break;
case nir_instr_type_tex:
emit_tex(c, nir_instr_as_tex(instr));
break;
case nir_instr_type_intrinsic:
emit_intrinsic(c, nir_instr_as_intrinsic(instr));
break;
case nir_instr_type_undef: {
nir_undef_instr *undef = nir_instr_as_undef(instr);
set_ureg(c, &undef->def,
swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO));
break;
}
case nir_instr_type_jump:
case nir_instr_type_deref:
break;
default:
i915_program_error(c->p, "unsupported NIR instruction type %d",
instr->type);
break;
}
}
static void
fixup_depth_write(struct nir_to_i915 *c, nir_shader *s)
{
if (!(s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)))
return;
/* NIR writes depth to OD.X (component 0); hardware reads from OD.W */
i915_emit_arith(c->p, A0_MOV,
UREG(REG_TYPE_OD, 0), A0_DEST_CHANNEL_W, 0,
swizzle(UREG(REG_TYPE_OD, 0), X, Y, Z, X),
0, 0);
}
void
i915_translate_fragment_program_nir(struct i915_context *i915,
struct i915_fragment_shader *ifs,
nir_shader *s,
const struct corm_compile_opts *opts)
{
nir_function_impl *impl = nir_shader_get_entrypoint(s);
bool debug = I915_DBG_ON(DBG_FS) &&
(!ifs->internal || NIR_DEBUG(PRINT_INTERNAL));
if (debug) {
mesa_logi("NIR fragment shader:");
nir_log_shaderi(s);
}
struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile);
p->shader = ifs;
p->error = ralloc_strdup(NULL, "");
p->log_program_errors = !ifs->internal;
ifs->num_constants = 0;
memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags));
memset(p->register_phases, 0, sizeof(p->register_phases));
for (int i = 0; i < I915_TEX_UNITS; i++)
ifs->texcoords[i].semantic = -1;
p->nr_tex_indirect = 1;
p->nr_tex_insn = 0;
p->nr_alu_insn = 0;
p->nr_decl_insn = 0;
p->csr = p->program;
p->decl = p->declarations;
p->decl_s = 0;
p->decl_t = 0;
p->temp_flag = ~0x0U << I915_MAX_TEMPORARY;
p->utemp_flag = ~0x7;
*(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM;
struct nir_to_i915 c = {
.p = p,
.ifs = ifs,
.opts = *opts,
.ureg_map_size = impl->ssa_alloc,
.ureg_map = CALLOC(impl->ssa_alloc, sizeof(uint32_t)),
};
nir_foreach_block(block, impl) {
nir_foreach_instr(instr, block) {
emit_instr(&c, instr);
if (p->error[0])
break;
}
if (p->error[0])
break;
}
if (!p->error[0])
fixup_depth_write(&c, s);
/* finalize */
if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT)
i915_program_error(p, "exceeded max tex indirect (%d/%d)",
p->nr_tex_indirect, I915_MAX_TEX_INDIRECT);
if (p->nr_tex_insn > I915_MAX_TEX_INSN)
i915_program_error(p, "exceeded max tex insn (%d/%d)",
p->nr_tex_insn, I915_MAX_TEX_INSN);
if (p->nr_alu_insn > I915_MAX_ALU_INSN)
i915_program_error(p, "exceeded max ALU insn (%d/%d)",
p->nr_alu_insn, I915_MAX_ALU_INSN);
if (p->nr_decl_insn > I915_MAX_DECL_INSN)
i915_program_error(p, "exceeded max decl insn (%d/%d)",
p->nr_decl_insn, I915_MAX_DECL_INSN);
if (p->nr_alu_insn == 0 && p->nr_tex_insn == 0) {
i915_use_passthrough_shader(ifs);
ifs->nr_alu_insn = 1;
goto cleanup;
}
ifs->nr_alu_insn = p->nr_alu_insn;
ifs->nr_tex_insn = p->nr_tex_insn;
ifs->nr_tex_indirect = p->nr_tex_indirect;
ifs->nr_temps = util_bitcount(p->temp_flag);
{
unsigned long program_size = (unsigned long)(p->csr - p->program);
unsigned long decl_size = (unsigned long)(p->decl - p->declarations);
p->declarations[0] |= program_size + decl_size - 2;
assert(!ifs->program);
ifs->program_len = decl_size + program_size;
ifs->program = (uint32_t *)MALLOC(ifs->program_len * sizeof(uint32_t));
memcpy(ifs->program, p->declarations, decl_size * sizeof(uint32_t));
memcpy(&ifs->program[decl_size], p->program,
program_size * sizeof(uint32_t));
if (p->error[0]) {
/* dump the program for debugging, then replace with passthrough */
if (debug && ifs->program_len > 2) {
mesa_logi("FAILED program (%d ALU):", p->nr_alu_insn);
i915_disassemble_program(ifs->program, ifs->program_len);
}
FREE(ifs->program);
ifs->program = NULL;
ifs->program_len = 0;
i915_use_passthrough_shader(ifs);
}
}
cleanup:
if (p->error[0])
ifs->error = p->error;
else
ralloc_free(p->error);
FREE(c.ureg_map);
FREE(p);
if (debug) {
if (ifs->error)
mesa_loge("%s", ifs->error);
mesa_logi("i915 fragment shader with %d constants%s",
ifs->num_constants, ifs->num_constants ? ":" : "");
for (int i = 0; i < I915_MAX_CONSTANT; i++) {
if (ifs->constant_flags[i] & 0x0f) {
mesa_logi("\t\tC[%d] = { %f, %f, %f, %f }", i,
ifs->constants[i][0], ifs->constants[i][1],
ifs->constants[i][2], ifs->constants[i][3]);
}
}
i915_disassemble_program(ifs->program, ifs->program_len);
}
}

View file

@ -176,6 +176,8 @@ i915_optimize_nir(struct nir_shader *s)
{
bool progress;
NIR_PASS(_, s, nir_lower_int_to_float);
do {
progress = false;
@ -212,6 +214,11 @@ i915_optimize_nir(struct nir_shader *s)
} while (progress);
NIR_PASS(_, s, nir_lower_alu_to_scalar, NULL, NULL);
NIR_PASS(_, s, nir_lower_bool_to_float, false);
NIR_PASS(_, s, nir_opt_algebraic);
NIR_PASS(_, s, nir_opt_dce);
NIR_PASS(progress, s, nir_remove_dead_variables, nir_var_function_temp,
NULL);

View file

@ -31,7 +31,9 @@
#include "compiler/nir/nir_builder.h"
#include "draw/draw_context.h"
#include "nir/nir_to_tgsi.h"
#include "tgsi/tgsi_from_mesa.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_scan.h"
#include "util/u_helpers.h"
#include "util/u_inlines.h"
#include "util/u_math.h"
@ -542,6 +544,23 @@ static const struct nir_to_tgsi_options ntt_options = {
.lower_fabs = true,
};
static int
type_size(const struct glsl_type *type, bool bindless)
{
return glsl_count_attribute_slots(type, false);
}
static bool
scalarize_vector_bools(const nir_instr *instr, const void *data)
{
if (instr->type != nir_instr_type_alu)
return false;
nir_alu_instr *alu = nir_instr_as_alu(instr);
return alu->op == nir_op_bcsel ||
alu->op == nir_op_fcsel_ge ||
alu->op == nir_op_fcsel_gt;
}
static char *
i915_check_control_flow(nir_shader *s)
{
@ -565,6 +584,94 @@ i915_check_control_flow(nir_shader *s)
return NULL;
}
enum i915_fs_mode {
I915_FS_TGSI,
I915_FS_NIR,
I915_FS_BOTH,
};
static enum i915_fs_mode
i915_get_fs_mode(void)
{
const char *env = debug_get_option("I915_FS", "both");
if (!strcmp(env, "tgsi"))
return I915_FS_TGSI;
if (!strcmp(env, "nir"))
return I915_FS_NIR;
return I915_FS_BOTH;
}
static void
i915_populate_fs_metadata(struct i915_fragment_shader *ifs, nir_shader *s)
{
ifs->num_inputs = 0;
ifs->writes_z = s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH);
nir_foreach_shader_in_variable(var, s) {
unsigned sem_name, sem_index;
tgsi_get_gl_varying_semantic((gl_varying_slot)var->data.location, true,
&sem_name, &sem_index);
unsigned idx = ifs->num_inputs++;
ifs->input_semantic_name[idx] = sem_name;
ifs->input_semantic_index[idx] = sem_index;
}
}
static void
i915_compile_tgsi(struct i915_context *i915,
struct i915_fragment_shader *ifs,
struct pipe_screen *screen,
nir_shader *nir_clone)
{
ifs->state.tokens = nir_to_tgsi_options(nir_clone, screen, &ntt_options);
ifs->state.type = PIPE_SHADER_IR_TGSI;
tgsi_scan_shader(ifs->state.tokens, &ifs->info);
i915_translate_fragment_program(i915, ifs);
}
static bool
corm_fs_better(const struct i915_fragment_shader *a,
const struct i915_fragment_shader *b)
{
if (a->nr_tex_indirect != b->nr_tex_indirect)
return a->nr_tex_indirect < b->nr_tex_indirect;
if (a->nr_alu_insn != b->nr_alu_insn)
return a->nr_alu_insn < b->nr_alu_insn;
if (a->nr_temps != b->nr_temps)
return a->nr_temps < b->nr_temps;
return a->num_constants < b->num_constants;
}
static const char *
corm_win_reason(const struct i915_fragment_shader *winner,
const struct i915_fragment_shader *loser,
char *buf, size_t len)
{
if (!loser) {
snprintf(buf, len, "only");
return buf;
}
int da = (int)winner->nr_alu_insn - (int)loser->nr_alu_insn;
int dp = (int)winner->nr_tex_indirect - (int)loser->nr_tex_indirect;
int dt = (int)winner->nr_temps - (int)loser->nr_temps;
if (dp != 0)
snprintf(buf, len, "%+d phase", dp);
else if (da != 0)
snprintf(buf, len, "%+d alu", da);
else if (dt != 0)
snprintf(buf, len, "%+d temps", dt);
else if ((int)winner->num_constants != (int)loser->num_constants)
snprintf(buf, len, "%+d const",
(int)winner->num_constants - (int)loser->num_constants);
else if (winner->program_len == loser->program_len &&
!memcmp(winner->program, loser->program,
winner->program_len * sizeof(uint32_t)))
snprintf(buf, len, "identical");
else
snprintf(buf, len, "tied");
return buf;
}
static void *
i915_create_fs_state(struct pipe_context *pipe,
const struct pipe_shader_state *templ)
@ -576,39 +683,206 @@ i915_create_fs_state(struct pipe_context *pipe,
ifs->draw_data = draw_create_fragment_shader(i915->draw, templ);
if (templ->type == PIPE_SHADER_IR_NIR) {
nir_shader *s = templ->ir.nir;
ifs->internal = s->info.internal;
char *msg = i915_check_control_flow(s);
if (msg) {
if (I915_DBG_ON(DBG_FS) &&
(!s->info.internal || NIR_DEBUG(PRINT_INTERNAL))) {
mesa_logi("failing shader:");
nir_log_shaderi(s);
}
if (templ->report_compile_error) {
((struct pipe_shader_state *)templ)->error_message = strdup(msg);
ralloc_free(s);
i915_delete_fs_state(NULL, ifs);
return NULL;
}
}
ifs->state.tokens = nir_to_tgsi_options(s, pipe->screen, &ntt_options);
} else {
assert(templ->type == PIPE_SHADER_IR_TGSI);
/* we need to keep a local copy of the tokens */
if (templ->type == PIPE_SHADER_IR_TGSI) {
ifs->state.tokens = tgsi_dup_tokens(templ->tokens);
ifs->state.type = PIPE_SHADER_IR_TGSI;
ifs->internal = i915->no_log_program_errors;
tgsi_scan_shader(ifs->state.tokens, &ifs->info);
i915_translate_fragment_program(i915, ifs);
return ifs;
}
ifs->state.type = PIPE_SHADER_IR_TGSI;
assert(templ->type == PIPE_SHADER_IR_NIR);
nir_shader *s = templ->ir.nir;
ifs->internal = s->info.internal;
tgsi_scan_shader(ifs->state.tokens, &ifs->info);
bool debug = I915_DBG_ON(DBG_FS) &&
(!s->info.internal || NIR_DEBUG(PRINT_INTERNAL));
char *msg = i915_check_control_flow(s);
if (msg) {
if (debug) {
mesa_logi("failing shader:");
nir_log_shaderi(s);
}
if (templ->report_compile_error) {
((struct pipe_shader_state *)templ)->error_message = strdup(msg);
ralloc_free(s);
i915_delete_fs_state(NULL, ifs);
return NULL;
}
}
static enum i915_fs_mode fs_mode = -1;
if (fs_mode == (enum i915_fs_mode)-1)
fs_mode = i915_get_fs_mode();
bool try_nir = (fs_mode == I915_FS_NIR || fs_mode == I915_FS_BOTH);
bool try_tgsi = (fs_mode == I915_FS_TGSI || fs_mode == I915_FS_BOTH);
struct i915_fragment_shader tgsi_fs = {0};
static const struct corm_compile_opts corm_variants[] = {
{ .deferred_const = false, .seq_sne_opt = false },
{ .deferred_const = false, .seq_sne_opt = true },
{ .deferred_const = true, .seq_sne_opt = false },
{ .deferred_const = true, .seq_sne_opt = true },
};
struct i915_fragment_shader nir_results[ARRAY_SIZE(corm_variants)];
int best_nir = -1;
if (try_nir) {
nir_shader *nir_s = try_tgsi ? nir_shader_clone(NULL, s) : s;
NIR_PASS(_, nir_s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
type_size, (nir_lower_io_options)0);
NIR_PASS(_, nir_s, nir_lower_alu_to_scalar, scalarize_vector_bools, NULL);
NIR_PASS(_, nir_s, nir_opt_vectorize, NULL, NULL);
NIR_PASS(_, nir_s, nir_lower_bool_to_float, false);
NIR_PASS(_, nir_s, nir_opt_algebraic);
NIR_PASS(_, nir_s, nir_opt_algebraic_late);
NIR_PASS(_, nir_s, nir_opt_dce);
nir_index_ssa_defs(nir_shader_get_entrypoint(nir_s));
for (unsigned v = 0; v < ARRAY_SIZE(corm_variants); v++) {
nir_shader *variant_nir = (v == ARRAY_SIZE(corm_variants) - 1)
? nir_s : nir_shader_clone(NULL, nir_s);
memset(&nir_results[v], 0, sizeof(nir_results[v]));
i915_populate_fs_metadata(&nir_results[v], variant_nir);
i915_translate_fragment_program_nir(i915, &nir_results[v],
variant_nir, &corm_variants[v]);
if (v < ARRAY_SIZE(corm_variants) - 1)
ralloc_free(variant_nir);
bool ok = !nir_results[v].error || !nir_results[v].error[0];
if (ok && (best_nir < 0 ||
corm_fs_better(&nir_results[v], &nir_results[best_nir])))
best_nir = v;
}
if (try_tgsi)
ralloc_free(nir_s);
}
if (try_tgsi) {
i915_compile_tgsi(i915, &tgsi_fs, pipe->screen, s);
} else {
ralloc_free(s);
}
bool nir_ok = best_nir >= 0;
bool tgsi_ok = try_tgsi && (!tgsi_fs.error || !tgsi_fs.error[0]);
struct i915_fragment_shader *best_nir_fs = nir_ok ? &nir_results[best_nir] : NULL;
bool use_nir;
if (nir_ok && tgsi_ok)
use_nir = !corm_fs_better(&tgsi_fs, best_nir_fs);
else
use_nir = nir_ok;
if (debug && try_nir && try_tgsi) {
for (unsigned v = 0; v < ARRAY_SIZE(corm_variants); v++) {
bool ok = !nir_results[v].error || !nir_results[v].error[0];
mesa_logi(" NIR[dc=%d,ss=%d]: %s (%d ALU, %d phase, %d temps)%s",
corm_variants[v].deferred_const,
corm_variants[v].seq_sne_opt,
ok ? "ok" : "FAIL",
ok ? nir_results[v].nr_alu_insn : 0,
ok ? nir_results[v].nr_tex_indirect : 0,
ok ? nir_results[v].nr_temps : 0,
(int)v == best_nir ? " *" : "");
}
mesa_logi(" TGSI: %s (%d ALU, %d phase, %d temps)",
tgsi_ok ? "ok" : "FAIL",
tgsi_ok ? tgsi_fs.nr_alu_insn : 0,
tgsi_ok ? tgsi_fs.nr_tex_indirect : 0,
tgsi_ok ? tgsi_fs.nr_temps : 0);
mesa_logi(" -> %s%s", use_nir ? "NIR" : "TGSI",
use_nir ? (corm_fs_better(best_nir_fs, &tgsi_fs)
? " (better)" : " (tied)") : "");
}
/* Free non-winning NIR variants */
if (try_nir) {
for (unsigned v = 0; v < ARRAY_SIZE(corm_variants); v++) {
if ((int)v != best_nir) {
FREE(nir_results[v].program);
ralloc_free(nir_results[v].error);
}
}
}
struct i915_fragment_shader *winner, *loser = NULL;
struct i915_fragment_shader nir_loser_copy = {0};
if (use_nir) {
winner = best_nir_fs;
loser = tgsi_ok ? &tgsi_fs : NULL;
} else {
winner = &tgsi_fs;
if (best_nir_fs) {
nir_loser_copy = *best_nir_fs;
nir_loser_copy.program = NULL;
loser = &nir_loser_copy;
FREE(best_nir_fs->program);
ralloc_free(best_nir_fs->error);
}
}
if (i915 && !ifs->internal) {
bool neither = (winner->nr_alu_insn + winner->nr_tex_insn) == 0;
char reason[32];
if (neither)
snprintf(reason, sizeof(reason), "neither");
else
corm_win_reason(winner, loser, reason, sizeof(reason));
util_debug_message(
&i915->debug, SHADER_INFO,
"%s shader [%s, %s]: %d instructions, %d alu, %d tex, "
"%d tex_indirect, %d temps, %d const",
_mesa_shader_stage_to_abbrev(MESA_SHADER_FRAGMENT),
neither ? "FAIL" : use_nir ? "NIR" : "TGSI", reason,
winner->nr_alu_insn + winner->nr_tex_insn,
winner->nr_alu_insn, winner->nr_tex_insn, winner->nr_tex_indirect,
winner->nr_temps, winner->num_constants);
}
ifs->program = winner->program;
ifs->program_len = winner->program_len;
ifs->nr_alu_insn = winner->nr_alu_insn;
ifs->nr_tex_insn = winner->nr_tex_insn;
ifs->nr_tex_indirect = winner->nr_tex_indirect;
ifs->nr_temps = winner->nr_temps;
ifs->num_constants = winner->num_constants;
memcpy(ifs->constants, winner->constants, sizeof(ifs->constants));
memcpy(ifs->constant_flags, winner->constant_flags,
sizeof(ifs->constant_flags));
memcpy(ifs->texcoords, winner->texcoords, sizeof(ifs->texcoords));
ifs->reads_pntc = winner->reads_pntc;
ifs->writes_z = winner->writes_z;
ifs->num_inputs = winner->num_inputs;
memcpy(ifs->input_semantic_name, winner->input_semantic_name,
sizeof(ifs->input_semantic_name));
memcpy(ifs->input_semantic_index, winner->input_semantic_index,
sizeof(ifs->input_semantic_index));
if (winner->error)
ifs->error = winner->error;
/* The loser's info may be in use (TGSI path populates ifs->info) */
if (try_tgsi)
ifs->info = tgsi_fs.info;
if (loser) {
FREE(loser->program);
ralloc_free(loser->error);
}
if (!use_nir && try_tgsi) {
/* TGSI won — tokens are in tgsi_fs via i915_compile_tgsi.
* We need them for ifs->state for draw's FS pipeline. */
ifs->state = tgsi_fs.state;
} else if (try_tgsi) {
FREE((void *)tgsi_fs.state.tokens);
}
/* The shader's compiled to i915 instructions here */
i915_translate_fragment_program(i915, ifs);
if (ifs->error && templ->report_compile_error) {
((struct pipe_shader_state *)templ)->error_message = strdup(ifs->error);
i915_delete_fs_state(NULL, ifs);

View file

@ -16,6 +16,7 @@ files_i915 = files(
'i915_flush.c',
'i915_fpc_emit.c',
'i915_fpc.h',
'i915_fpc_nir.c',
'i915_fpc_optimize.c',
'i915_fpc_translate.c',
'i915_prim_emit.c',