Merge branch 'i915-nir-backend' into 'main'

Draft: i915: s/tgsi/nir/

See merge request mesa/mesa!41371
This commit is contained in:
Adam Jackson 2026-05-07 20:10:03 -04:00
commit 52a3df7f76
10 changed files with 1853 additions and 169 deletions

View file

@ -88,8 +88,15 @@ struct i915_winsys_batchbuffer;
#define I915_MAX_CONSTANT 32
/** See constant_flags[] below */
#define I915_CONSTFLAG_USER 0x1f
/**
* Per-channel flags for constant_flags[].
* Bits 0-3: channel has a compiler immediate.
* Bits 4-7: channel has a user (UBO) value uploaded at draw time.
* A channel is available when neither bit is set.
*/
#define I915_CONSTFLAG_IMM(ch) (1 << (ch))
#define I915_CONSTFLAG_USER_CH(ch) (1 << ((ch) + 4))
#define I915_CONSTFLAG_USER 0xf0
/**
* Subclass of pipe_shader_state
@ -103,6 +110,10 @@ struct i915_fragment_shader {
uint32_t *program;
uint32_t program_len;
uint32_t nr_alu_insn;
uint32_t nr_tex_insn;
uint32_t nr_tex_indirect;
uint32_t nr_temps;
/**
* constants introduced during translation.
@ -134,12 +145,15 @@ struct i915_fragment_shader {
} texcoords[I915_TEX_UNITS];
bool reads_pntc;
bool writes_z;
unsigned num_inputs;
uint8_t input_semantic_name[PIPE_MAX_SHADER_INPUTS];
uint8_t input_semantic_index[PIPE_MAX_SHADER_INPUTS];
/* Set if the shader is an internal (blit, etc.) shader that shouldn't debug
* log by default. */
bool internal;
char *error; /* Any error message from compiling this shader (or NULL) */
char *error;
};
struct i915_cache_context;

View file

@ -136,6 +136,15 @@ swizzle(int reg, uint32_t x, uint32_t y, uint32_t z, uint32_t w)
CHANNEL_SRC(GET_CHANNEL_SRC(reg, w), 3));
}
static inline int
negate(int reg, int x, int y, int z, int w)
{
return reg ^ (x << UREG_CHANNEL_X_NEGATE_SHIFT |
y << UREG_CHANNEL_Y_NEGATE_SHIFT |
z << UREG_CHANNEL_Z_NEGATE_SHIFT |
w << UREG_CHANNEL_W_NEGATE_SHIFT);
}
#define A0_DEST(reg) (((reg)&UREG_TYPE_NR_MASK) >> UREG_A0_DEST_SHIFT_LEFT)
#define D0_DEST(reg) (((reg)&UREG_TYPE_NR_MASK) >> UREG_A0_DEST_SHIFT_LEFT)
#define T0_DEST(reg) (((reg)&UREG_TYPE_NR_MASK) >> UREG_A0_DEST_SHIFT_LEFT)
@ -173,8 +182,21 @@ swizzle(int reg, uint32_t x, uint32_t y, uint32_t z, uint32_t w)
*/
extern void i915_translate_fragment_program(struct i915_context *i915,
struct i915_fragment_shader *fs);
struct corm_compile_opts {
bool deferred_const;
bool seq_sne_opt;
bool late_scalar;
};
extern void i915_translate_fragment_program_nir(struct i915_context *i915,
struct i915_fragment_shader *ifs,
struct nir_shader *s,
const struct corm_compile_opts *opts);
extern void i915_use_passthrough_shader(struct i915_fragment_shader *fs);
extern void i915_program_error(struct i915_fp_compile *p, const char *msg, ...);
extern uint32_t i915_get_temp(struct i915_fp_compile *p);
extern void i915_release_temp(struct i915_fp_compile *p, int reg);
extern uint32_t i915_get_utemp(struct i915_fp_compile *p);
extern void i915_release_utemps(struct i915_fp_compile *p);
@ -191,6 +213,8 @@ extern uint32_t i915_emit_decl(struct i915_fp_compile *p, uint32_t type,
uint32_t nr, uint32_t d0_flags);
extern uint32_t i915_emit_const1f(struct i915_fp_compile *p, float c0);
extern uint32_t i915_emit_const1f_prefer(struct i915_fp_compile *p, float c0,
int preferred_reg);
extern uint32_t i915_emit_const2f(struct i915_fp_compile *p, float c0,
float c1);

View file

@ -25,11 +25,45 @@
*
**************************************************************************/
#include <stdarg.h>
#include "util/ralloc.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include "i915_context.h"
#include "i915_fpc.h"
#include "i915_reg.h"
void
i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
{
va_list args;
va_start(args, msg);
ralloc_vasprintf_append(&p->error, msg, args);
va_end(args);
}
static const unsigned passthrough_program[] = {
_3DSTATE_PIXEL_SHADER_PROGRAM | ((1 * 3) - 1),
(A0_MOV | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | A0_DEST_CHANNEL_ALL |
(REG_TYPE_R << A0_SRC0_TYPE_SHIFT) | (0 << A0_SRC0_NR_SHIFT)),
((SRC_ONE << A1_SRC0_CHANNEL_X_SHIFT) |
(SRC_ZERO << A1_SRC0_CHANNEL_Y_SHIFT) |
(SRC_ZERO << A1_SRC0_CHANNEL_Z_SHIFT) |
(SRC_ONE << A1_SRC0_CHANNEL_W_SHIFT)),
0};
void
i915_use_passthrough_shader(struct i915_fragment_shader *fs)
{
fs->program = (uint32_t *)MALLOC(sizeof(passthrough_program));
if (fs->program) {
memcpy(fs->program, passthrough_program, sizeof(passthrough_program));
fs->program_len = ARRAY_SIZE(passthrough_program);
}
fs->num_constants = 0;
}
uint32_t
i915_get_temp(struct i915_fp_compile *p)
{
@ -43,7 +77,7 @@ i915_get_temp(struct i915_fp_compile *p)
return bit - 1;
}
static void
void
i915_release_temp(struct i915_fp_compile *p, int reg)
{
p->temp_flag &= ~(1 << reg);
@ -179,8 +213,6 @@ i915_emit_texld(struct i915_fp_compile *p, uint32_t dest, uint32_t destmask,
{
const uint32_t k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord));
int temp = -1;
uint32_t coord_used = 0xf << UREG_CHANNEL_X_SHIFT;
if (coord_mask & TGSI_WRITEMASK_Y)
coord_used |= 0xf << UREG_CHANNEL_Y_SHIFT;
@ -191,13 +223,10 @@ i915_emit_texld(struct i915_fp_compile *p, uint32_t dest, uint32_t destmask,
if ((coord & coord_used) != (k & coord_used) ||
GET_UREG_TYPE(coord) == REG_TYPE_CONST) {
/* texcoord is swizzled or negated. Need to allocate a new temporary
* register (a utemp / unpreserved temp) won't do.
/* texcoord is swizzled or negated. Need a temporary to hold it.
* Use a utemp so it doesn't create a tex indirect phase boundary.
*/
uint32_t tempReg;
temp = i915_get_temp(p); /* get temp reg index */
tempReg = UREG(REG_TYPE_R, temp); /* make i915 register */
uint32_t tempReg = i915_get_utemp(p);
i915_emit_arith(p, A0_MOV, tempReg,
A0_DEST_CHANNEL_ALL, /* dest reg, writemask */
@ -227,11 +256,21 @@ i915_emit_texld(struct i915_fp_compile *p, uint32_t dest, uint32_t destmask,
p->nr_tex_indirect++;
/* Reading from an r# register whose contents depend on output of the
* current phase defines a phase boundary.
* current phase defines a phase boundary. Prefer just bumping the
* phase count (free), but if we'd exceed the HW limit, copy to a
* utemp instead (costs 1 ALU instruction).
*/
if (GET_UREG_TYPE(coord) == REG_TYPE_R &&
p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect)
p->nr_tex_indirect++;
p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect) {
if (p->nr_tex_indirect + 1 < I915_MAX_TEX_INDIRECT) {
p->nr_tex_indirect++;
} else {
uint32_t tmp = i915_get_utemp(p);
i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0,
coord, 0, 0);
coord = tmp;
}
}
if (p->csr < p->program + I915_PROGRAM_SIZE) {
*(p->csr++) = (opcode | T0_DEST(dest) | T0_SAMPLER(sampler));
@ -246,40 +285,75 @@ i915_emit_texld(struct i915_fp_compile *p, uint32_t dest, uint32_t destmask,
p->nr_tex_insn++;
}
if (temp >= 0)
i915_release_temp(p, temp);
return dest;
}
static uint32_t
i915_try_const1f_in_reg(struct i915_fp_compile *p, float c0, unsigned reg)
{
struct i915_fragment_shader *ifs = p->shader;
for (unsigned idx = 0; idx < 4; idx++) {
if (ifs->constant_flags[reg] & I915_CONSTFLAG_USER_CH(idx))
continue;
if (!(ifs->constant_flags[reg] & I915_CONSTFLAG_IMM(idx)) ||
ifs->constants[reg][idx] == c0) {
ifs->constants[reg][idx] = c0;
ifs->constant_flags[reg] |= I915_CONSTFLAG_IMM(idx);
if (reg + 1 > ifs->num_constants)
ifs->num_constants = reg + 1;
return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE);
}
}
return UREG_BAD;
}
static uint32_t
i915_try_emit_const1f(struct i915_fp_compile *p, float c0, int preferred_reg)
{
if (preferred_reg >= 0) {
uint32_t r = i915_try_const1f_in_reg(p, c0, preferred_reg);
if (r != UREG_BAD)
return r;
}
for (unsigned reg = 0; reg < I915_MAX_CONSTANT; reg++) {
uint32_t r = i915_try_const1f_in_reg(p, c0, reg);
if (r != UREG_BAD)
return r;
}
i915_program_error(p, "i915_emit_const1f: out of constants");
return 0;
}
uint32_t
i915_emit_const1f(struct i915_fp_compile *p, float c0)
{
struct i915_fragment_shader *ifs = p->shader;
unsigned reg, idx;
if (c0 == 0.0)
return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO);
if (c0 == 1.0)
return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE);
if (c0 == -1.0)
return negate(swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE),
1, 1, 1, 1);
for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
if (ifs->constant_flags[reg] == I915_CONSTFLAG_USER)
continue;
for (idx = 0; idx < 4; idx++) {
if (!(ifs->constant_flags[reg] & (1 << idx)) ||
ifs->constants[reg][idx] == c0) {
ifs->constants[reg][idx] = c0;
ifs->constant_flags[reg] |= 1 << idx;
if (reg + 1 > ifs->num_constants)
ifs->num_constants = reg + 1;
return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE);
}
}
}
return i915_try_emit_const1f(p, c0, -1);
}
i915_program_error(p, "i915_emit_const1f: out of constants");
return 0;
uint32_t
i915_emit_const1f_prefer(struct i915_fp_compile *p, float c0,
int preferred_reg)
{
if (c0 == 0.0)
return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO);
if (c0 == 1.0)
return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE);
if (c0 == -1.0)
return negate(swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE),
1, 1, 1, 1);
return i915_try_emit_const1f(p, c0, preferred_reg);
}
uint32_t
@ -301,14 +375,15 @@ i915_emit_const2f(struct i915_fp_compile *p, float c0, float c1)
// XXX emit swizzle here for 0, 1, -1 and any combination thereof
// we can use swizzle + neg for that
for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
if (ifs->constant_flags[reg] == 0xf ||
ifs->constant_flags[reg] == I915_CONSTFLAG_USER)
uint8_t occupied = (ifs->constant_flags[reg] & 0xf) |
(ifs->constant_flags[reg] >> 4);
if (occupied == 0xf)
continue;
for (idx = 0; idx < 3; idx++) {
if (!(ifs->constant_flags[reg] & (3 << idx))) {
if (!(occupied & (3 << idx))) {
ifs->constants[reg][idx + 0] = c0;
ifs->constants[reg][idx + 1] = c1;
ifs->constant_flags[reg] |= 3 << idx;
ifs->constant_flags[reg] |= (3 << idx); /* immediate bits */
if (reg + 1 > ifs->num_constants)
ifs->num_constants = reg + 1;
return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE);
@ -330,9 +405,9 @@ i915_emit_const4f(struct i915_fp_compile *p, float c0, float c1, float c2,
// XXX emit swizzle here for 0, 1, -1 and any combination thereof
// we can use swizzle + neg for that
for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
if (ifs->constant_flags[reg] == 0xf && ifs->constants[reg][0] == c0 &&
ifs->constants[reg][1] == c1 && ifs->constants[reg][2] == c2 &&
ifs->constants[reg][3] == c3) {
if ((ifs->constant_flags[reg] & 0x0f) == 0x0f &&
ifs->constants[reg][0] == c0 && ifs->constants[reg][1] == c1 &&
ifs->constants[reg][2] == c2 && ifs->constants[reg][3] == c3) {
return UREG(REG_TYPE_CONST, reg);
} else if (ifs->constant_flags[reg] == 0) {
@ -340,7 +415,7 @@ i915_emit_const4f(struct i915_fp_compile *p, float c0, float c1, float c2,
ifs->constants[reg][1] = c1;
ifs->constants[reg][2] = c2;
ifs->constants[reg][3] = c3;
ifs->constant_flags[reg] = 0xf;
ifs->constant_flags[reg] = 0x0f;
if (reg + 1 > ifs->num_constants)
ifs->num_constants = reg + 1;
return UREG(REG_TYPE_CONST, reg);

File diff suppressed because it is too large Load diff

View file

@ -405,6 +405,8 @@ i915_fpc_optimize_mov_before_tex(struct i915_optimize_context *ctx,
target_is_texture2d(next->FullInstruction.Texture.Texture) &&
same_src_dst_reg(&next->FullInstruction.Src[0],
&current->FullInstruction.Dst[0]) &&
(current->FullInstruction.Dst[0].Register.WriteMask &
i915_tex_mask(next)) == i915_tex_mask(next) &&
is_unswizzled(&current->FullInstruction.Src[0], i915_tex_mask(next)) &&
unused_from(ctx, &current->FullInstruction.Dst[0], index)) {
memcpy(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0],

View file

@ -54,55 +54,9 @@
* Simple pass-through fragment shader to use when we don't have
* a real shader (or it fails to compile for some reason).
*/
static unsigned passthrough_program[] = {
_3DSTATE_PIXEL_SHADER_PROGRAM | ((1 * 3) - 1),
/* move to output color:
*/
(A0_MOV | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | A0_DEST_CHANNEL_ALL |
(REG_TYPE_R << A0_SRC0_TYPE_SHIFT) | (0 << A0_SRC0_NR_SHIFT)),
((SRC_ONE << A1_SRC0_CHANNEL_X_SHIFT) |
(SRC_ZERO << A1_SRC0_CHANNEL_Y_SHIFT) |
(SRC_ZERO << A1_SRC0_CHANNEL_Z_SHIFT) |
(SRC_ONE << A1_SRC0_CHANNEL_W_SHIFT)),
0};
/**
* component-wise negation of ureg
*/
static inline int
negate(int reg, int x, int y, int z, int w)
{
/* Another neat thing about the UREG representation */
return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) |
((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) |
((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) |
((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT));
}
/**
* In the event of a translation failure, we'll generate a simple color
* pass-through program.
*/
static void
i915_use_passthrough_shader(struct i915_fragment_shader *fs)
{
fs->program = (uint32_t *)MALLOC(sizeof(passthrough_program));
if (fs->program) {
memcpy(fs->program, passthrough_program, sizeof(passthrough_program));
fs->program_len = ARRAY_SIZE(passthrough_program);
}
fs->num_constants = 0;
}
void
i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
{
va_list args;
va_start(args, msg);
ralloc_vasprintf_append(&p->error, msg, args);
va_end(args);
}
static uint32_t
get_mapping(struct i915_fragment_shader *fs, enum tgsi_semantic semantic,
int index)
@ -1006,12 +960,11 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
p->nr_decl_insn, I915_MAX_DECL_INSN);
}
/* hw doesn't seem to like empty frag programs (num_instructions == 1 is just
* TGSI_END), even when the depth write fixup gets emitted below - maybe that
* one is fishy, too?
*/
if (ifs->info.num_instructions == 1)
i915_program_error(p, "Empty fragment shader");
if (ifs->info.num_instructions == 1) {
i915_use_passthrough_shader(ifs);
ifs->nr_alu_insn = 1;
goto done;
}
if (strlen(p->error) != 0) {
i915_use_passthrough_shader(ifs);
@ -1024,6 +977,10 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
assert(!ifs->program);
ifs->program_len = decl_size + program_size;
ifs->nr_alu_insn = p->nr_alu_insn;
ifs->nr_tex_insn = p->nr_tex_insn;
ifs->nr_tex_indirect = p->nr_tex_indirect;
ifs->nr_temps = util_bitcount(p->temp_flag);
ifs->program = (uint32_t *)MALLOC(ifs->program_len * sizeof(uint32_t));
memcpy(ifs->program, p->declarations, decl_size * sizeof(uint32_t));
memcpy(&ifs->program[decl_size], p->program,
@ -1032,14 +989,16 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
if (i915) {
util_debug_message(
&i915->debug, SHADER_INFO,
"%s shader: %d inst, %d tex, %d tex_indirect, %d temps, %d const",
"%s shader: %d instructions, %d alu, %d tex, %d tex_indirect, "
"%d temps, %d const",
_mesa_shader_stage_to_abbrev(MESA_SHADER_FRAGMENT),
(int)program_size, p->nr_tex_insn, p->nr_tex_indirect,
p->shader->info.file_max[TGSI_FILE_TEMPORARY] + 1,
ifs->num_constants);
ifs->nr_alu_insn + ifs->nr_tex_insn,
ifs->nr_alu_insn, ifs->nr_tex_insn, ifs->nr_tex_indirect,
ifs->nr_temps, ifs->num_constants);
}
}
done:
if (strlen(p->error) != 0)
ifs->error = p->error;
else

View file

@ -176,6 +176,8 @@ i915_optimize_nir(struct nir_shader *s)
{
bool progress;
NIR_PASS(_, s, nir_lower_int_to_float);
do {
progress = false;
@ -212,6 +214,11 @@ i915_optimize_nir(struct nir_shader *s)
} while (progress);
NIR_PASS(_, s, nir_lower_alu_to_scalar, NULL, NULL);
NIR_PASS(_, s, nir_lower_bool_to_float, false);
NIR_PASS(_, s, nir_opt_algebraic);
NIR_PASS(_, s, nir_opt_dce);
NIR_PASS(progress, s, nir_remove_dead_variables, nir_var_function_temp,
NULL);

View file

@ -31,7 +31,9 @@
#include "compiler/nir/nir_builder.h"
#include "draw/draw_context.h"
#include "nir/nir_to_tgsi.h"
#include "tgsi/tgsi_from_mesa.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_scan.h"
#include "util/u_helpers.h"
#include "util/u_inlines.h"
#include "util/u_math.h"
@ -542,6 +544,37 @@ static const struct nir_to_tgsi_options ntt_options = {
.lower_fabs = true,
};
static int
type_size(const struct glsl_type *type, bool bindless)
{
return glsl_count_attribute_slots(type, false);
}
static bool
scalarize_vector_bools(const nir_instr *instr, const void *data)
{
if (instr->type != nir_instr_type_alu)
return false;
nir_alu_instr *alu = nir_instr_as_alu(instr);
return alu->op == nir_op_bcsel ||
alu->op == nir_op_fcsel_ge ||
alu->op == nir_op_fcsel_gt;
}
static bool
lower_fsqrt_filter(const nir_instr *instr, UNUSED const void *data)
{
return instr->type == nir_instr_type_alu &&
nir_instr_as_alu(instr)->op == nir_op_fsqrt;
}
static nir_def *
lower_fsqrt_impl(nir_builder *b, nir_instr *instr, UNUSED void *data)
{
nir_def *src = nir_instr_as_alu(instr)->src[0].src.ssa;
return nir_fmul(b, src, nir_frsq(b, src));
}
static char *
i915_check_control_flow(nir_shader *s)
{
@ -565,6 +598,94 @@ i915_check_control_flow(nir_shader *s)
return NULL;
}
enum i915_fs_mode {
I915_FS_TGSI,
I915_FS_NIR,
I915_FS_BOTH,
};
static enum i915_fs_mode
i915_get_fs_mode(void)
{
const char *env = debug_get_option("I915_FS", "both");
if (!strcmp(env, "tgsi"))
return I915_FS_TGSI;
if (!strcmp(env, "nir"))
return I915_FS_NIR;
return I915_FS_BOTH;
}
static void
i915_populate_fs_metadata(struct i915_fragment_shader *ifs, nir_shader *s)
{
ifs->num_inputs = 0;
ifs->writes_z = s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH);
nir_foreach_shader_in_variable(var, s) {
unsigned sem_name, sem_index;
tgsi_get_gl_varying_semantic((gl_varying_slot)var->data.location, true,
&sem_name, &sem_index);
unsigned idx = ifs->num_inputs++;
ifs->input_semantic_name[idx] = sem_name;
ifs->input_semantic_index[idx] = sem_index;
}
}
static void
i915_compile_tgsi(struct i915_context *i915,
struct i915_fragment_shader *ifs,
struct pipe_screen *screen,
nir_shader *nir_clone)
{
ifs->state.tokens = nir_to_tgsi_options(nir_clone, screen, &ntt_options);
ifs->state.type = PIPE_SHADER_IR_TGSI;
tgsi_scan_shader(ifs->state.tokens, &ifs->info);
i915_translate_fragment_program(i915, ifs);
}
static bool
corm_fs_better(const struct i915_fragment_shader *a,
const struct i915_fragment_shader *b)
{
if (a->nr_tex_indirect != b->nr_tex_indirect)
return a->nr_tex_indirect < b->nr_tex_indirect;
if (a->nr_alu_insn != b->nr_alu_insn)
return a->nr_alu_insn < b->nr_alu_insn;
if (a->nr_temps != b->nr_temps)
return a->nr_temps < b->nr_temps;
return a->num_constants < b->num_constants;
}
static const char *
corm_win_reason(const struct i915_fragment_shader *winner,
const struct i915_fragment_shader *loser,
char *buf, size_t len)
{
if (!loser) {
snprintf(buf, len, "only");
return buf;
}
int da = (int)winner->nr_alu_insn - (int)loser->nr_alu_insn;
int dp = (int)winner->nr_tex_indirect - (int)loser->nr_tex_indirect;
int dt = (int)winner->nr_temps - (int)loser->nr_temps;
if (dp != 0)
snprintf(buf, len, "%+d phase", dp);
else if (da != 0)
snprintf(buf, len, "%+d alu", da);
else if (dt != 0)
snprintf(buf, len, "%+d temps", dt);
else if ((int)winner->num_constants != (int)loser->num_constants)
snprintf(buf, len, "%+d const",
(int)winner->num_constants - (int)loser->num_constants);
else if (winner->program_len == loser->program_len &&
!memcmp(winner->program, loser->program,
winner->program_len * sizeof(uint32_t)))
snprintf(buf, len, "identical");
else
snprintf(buf, len, "tied");
return buf;
}
static void *
i915_create_fs_state(struct pipe_context *pipe,
const struct pipe_shader_state *templ)
@ -576,39 +697,222 @@ i915_create_fs_state(struct pipe_context *pipe,
ifs->draw_data = draw_create_fragment_shader(i915->draw, templ);
if (templ->type == PIPE_SHADER_IR_NIR) {
nir_shader *s = templ->ir.nir;
ifs->internal = s->info.internal;
char *msg = i915_check_control_flow(s);
if (msg) {
if (I915_DBG_ON(DBG_FS) &&
(!s->info.internal || NIR_DEBUG(PRINT_INTERNAL))) {
mesa_logi("failing shader:");
nir_log_shaderi(s);
}
if (templ->report_compile_error) {
((struct pipe_shader_state *)templ)->error_message = strdup(msg);
ralloc_free(s);
i915_delete_fs_state(NULL, ifs);
return NULL;
}
}
ifs->state.tokens = nir_to_tgsi_options(s, pipe->screen, &ntt_options);
} else {
assert(templ->type == PIPE_SHADER_IR_TGSI);
/* we need to keep a local copy of the tokens */
if (templ->type == PIPE_SHADER_IR_TGSI) {
ifs->state.tokens = tgsi_dup_tokens(templ->tokens);
ifs->state.type = PIPE_SHADER_IR_TGSI;
ifs->internal = i915->no_log_program_errors;
tgsi_scan_shader(ifs->state.tokens, &ifs->info);
i915_translate_fragment_program(i915, ifs);
return ifs;
}
ifs->state.type = PIPE_SHADER_IR_TGSI;
assert(templ->type == PIPE_SHADER_IR_NIR);
nir_shader *s = templ->ir.nir;
ifs->internal = s->info.internal;
tgsi_scan_shader(ifs->state.tokens, &ifs->info);
bool debug = I915_DBG_ON(DBG_FS) &&
(!s->info.internal || NIR_DEBUG(PRINT_INTERNAL));
char *msg = i915_check_control_flow(s);
if (msg) {
if (debug) {
mesa_logi("failing shader:");
nir_log_shaderi(s);
}
if (templ->report_compile_error) {
((struct pipe_shader_state *)templ)->error_message = strdup(msg);
ralloc_free(s);
i915_delete_fs_state(NULL, ifs);
return NULL;
}
}
static enum i915_fs_mode fs_mode = -1;
if (fs_mode == (enum i915_fs_mode)-1)
fs_mode = i915_get_fs_mode();
bool try_nir = (fs_mode == I915_FS_NIR || fs_mode == I915_FS_BOTH);
bool try_tgsi = (fs_mode == I915_FS_TGSI || fs_mode == I915_FS_BOTH);
struct i915_fragment_shader tgsi_fs = {0};
static const struct corm_compile_opts corm_variants[] = {
{ .deferred_const = false, .seq_sne_opt = false },
{ .deferred_const = false, .seq_sne_opt = true },
{ .deferred_const = true, .seq_sne_opt = false },
{ .deferred_const = true, .seq_sne_opt = true },
{ .deferred_const = false, .seq_sne_opt = false, .late_scalar = true },
{ .deferred_const = false, .seq_sne_opt = true, .late_scalar = true },
{ .deferred_const = true, .seq_sne_opt = false, .late_scalar = true },
{ .deferred_const = true, .seq_sne_opt = true, .late_scalar = true },
};
struct i915_fragment_shader nir_results[ARRAY_SIZE(corm_variants)];
int best_nir = -1;
if (try_nir) {
nir_shader *nir_s = try_tgsi ? nir_shader_clone(NULL, s) : s;
NIR_PASS(_, nir_s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
type_size, (nir_lower_io_options)0);
NIR_PASS(_, nir_s, nir_lower_alu_to_scalar, scalarize_vector_bools, NULL);
NIR_PASS(_, nir_s, nir_opt_vectorize, NULL, NULL);
NIR_PASS(_, nir_s, nir_lower_bool_to_float, false);
NIR_PASS(_, nir_s, nir_shader_lower_instructions, lower_fsqrt_filter,
lower_fsqrt_impl, NULL);
NIR_PASS(_, nir_s, nir_opt_copy_prop);
NIR_PASS(_, nir_s, nir_opt_cse);
NIR_PASS(_, nir_s, nir_opt_dce);
NIR_PASS(_, nir_s, nir_opt_algebraic);
NIR_PASS(_, nir_s, nir_opt_algebraic_late);
NIR_PASS(_, nir_s, nir_opt_dce);
NIR_PASS(_, nir_s, nir_opt_shrink_vectors, false);
NIR_PASS(_, nir_s, nir_opt_copy_prop);
NIR_PASS(_, nir_s, nir_opt_dce);
nir_index_ssa_defs(nir_shader_get_entrypoint(nir_s));
for (unsigned v = 0; v < ARRAY_SIZE(corm_variants); v++) {
nir_shader *variant_nir = nir_shader_clone(NULL, nir_s);
if (corm_variants[v].late_scalar) {
NIR_PASS(_, variant_nir, nir_lower_alu_to_scalar, NULL, NULL);
NIR_PASS(_, variant_nir, nir_opt_copy_prop);
NIR_PASS(_, variant_nir, nir_opt_algebraic);
NIR_PASS(_, variant_nir, nir_opt_dce);
nir_index_ssa_defs(nir_shader_get_entrypoint(variant_nir));
}
memset(&nir_results[v], 0, sizeof(nir_results[v]));
i915_populate_fs_metadata(&nir_results[v], variant_nir);
i915_translate_fragment_program_nir(i915, &nir_results[v],
variant_nir, &corm_variants[v]);
ralloc_free(variant_nir);
bool ok = !nir_results[v].error || !nir_results[v].error[0];
if (ok && (best_nir < 0 ||
corm_fs_better(&nir_results[v], &nir_results[best_nir])))
best_nir = v;
}
ralloc_free(nir_s);
}
if (try_tgsi) {
i915_compile_tgsi(i915, &tgsi_fs, pipe->screen, s);
} else {
ralloc_free(s);
}
bool nir_ok = best_nir >= 0;
bool tgsi_ok = try_tgsi && (!tgsi_fs.error || !tgsi_fs.error[0]);
struct i915_fragment_shader *best_nir_fs = nir_ok ? &nir_results[best_nir] : NULL;
bool use_nir;
if (nir_ok && tgsi_ok)
use_nir = !corm_fs_better(&tgsi_fs, best_nir_fs);
else
use_nir = nir_ok;
if (debug && try_nir && try_tgsi) {
for (unsigned v = 0; v < ARRAY_SIZE(corm_variants); v++) {
bool ok = !nir_results[v].error || !nir_results[v].error[0];
mesa_logi(" NIR[dc=%d,ss=%d]: %s (%d ALU, %d phase, %d temps)%s",
corm_variants[v].deferred_const,
corm_variants[v].seq_sne_opt,
ok ? "ok" : "FAIL",
ok ? nir_results[v].nr_alu_insn : 0,
ok ? nir_results[v].nr_tex_indirect : 0,
ok ? nir_results[v].nr_temps : 0,
(int)v == best_nir ? " *" : "");
}
mesa_logi(" TGSI: %s (%d ALU, %d phase, %d temps)",
tgsi_ok ? "ok" : "FAIL",
tgsi_ok ? tgsi_fs.nr_alu_insn : 0,
tgsi_ok ? tgsi_fs.nr_tex_indirect : 0,
tgsi_ok ? tgsi_fs.nr_temps : 0);
mesa_logi(" -> %s%s", use_nir ? "NIR" : "TGSI",
use_nir ? (corm_fs_better(best_nir_fs, &tgsi_fs)
? " (better)" : " (tied)") : "");
}
/* Free non-winning NIR variants */
if (try_nir) {
for (unsigned v = 0; v < ARRAY_SIZE(corm_variants); v++) {
if ((int)v != best_nir) {
FREE(nir_results[v].program);
ralloc_free(nir_results[v].error);
}
}
}
struct i915_fragment_shader *winner, *loser = NULL;
struct i915_fragment_shader nir_loser_copy = {0};
if (use_nir) {
winner = best_nir_fs;
loser = tgsi_ok ? &tgsi_fs : NULL;
} else {
winner = &tgsi_fs;
if (best_nir_fs) {
nir_loser_copy = *best_nir_fs;
nir_loser_copy.program = NULL;
loser = &nir_loser_copy;
FREE(best_nir_fs->program);
ralloc_free(best_nir_fs->error);
}
}
if (i915 && !ifs->internal) {
bool neither = (winner->nr_alu_insn + winner->nr_tex_insn) == 0;
char reason[32];
if (neither)
snprintf(reason, sizeof(reason), "neither");
else
corm_win_reason(winner, loser, reason, sizeof(reason));
util_debug_message(
&i915->debug, SHADER_INFO,
"%s shader [%s, %s]: %d instructions, %d alu, %d tex, "
"%d tex_indirect, %d temps, %d const",
_mesa_shader_stage_to_abbrev(MESA_SHADER_FRAGMENT),
neither ? "FAIL" : use_nir ? "NIR" : "TGSI", reason,
winner->nr_alu_insn + winner->nr_tex_insn,
winner->nr_alu_insn, winner->nr_tex_insn, winner->nr_tex_indirect,
winner->nr_temps, winner->num_constants);
}
ifs->program = winner->program;
ifs->program_len = winner->program_len;
ifs->nr_alu_insn = winner->nr_alu_insn;
ifs->nr_tex_insn = winner->nr_tex_insn;
ifs->nr_tex_indirect = winner->nr_tex_indirect;
ifs->nr_temps = winner->nr_temps;
ifs->num_constants = winner->num_constants;
memcpy(ifs->constants, winner->constants, sizeof(ifs->constants));
memcpy(ifs->constant_flags, winner->constant_flags,
sizeof(ifs->constant_flags));
memcpy(ifs->texcoords, winner->texcoords, sizeof(ifs->texcoords));
ifs->reads_pntc = winner->reads_pntc;
ifs->writes_z = winner->writes_z;
ifs->num_inputs = winner->num_inputs;
memcpy(ifs->input_semantic_name, winner->input_semantic_name,
sizeof(ifs->input_semantic_name));
memcpy(ifs->input_semantic_index, winner->input_semantic_index,
sizeof(ifs->input_semantic_index));
if (winner->error)
ifs->error = winner->error;
/* The loser's info may be in use (TGSI path populates ifs->info) */
if (try_tgsi)
ifs->info = tgsi_fs.info;
if (loser) {
FREE(loser->program);
ralloc_free(loser->error);
}
if (!use_nir && try_tgsi) {
/* TGSI won — tokens are in tgsi_fs via i915_compile_tgsi.
* We need them for ifs->state for draw's FS pipeline. */
ifs->state = tgsi_fs.state;
} else if (try_tgsi) {
FREE((void *)tgsi_fs.state.tokens);
}
/* The shader's compiled to i915 instructions here */
i915_translate_fragment_program(i915, ifs);
if (ifs->error && templ->report_compile_error) {
((struct pipe_shader_state *)templ)->error_message = strdup(ifs->error);
i915_delete_fs_state(NULL, ifs);
@ -667,28 +971,11 @@ i915_create_vs_state(struct pipe_context *pipe,
const struct pipe_shader_state *templ)
{
struct i915_context *i915 = i915_context(pipe);
void *vertex_shader;
struct pipe_shader_state from_nir = {PIPE_SHADER_IR_TGSI};
if (templ->type == PIPE_SHADER_IR_NIR) {
nir_shader *s = templ->ir.nir;
if (templ->type == PIPE_SHADER_IR_NIR)
NIR_PASS(_, templ->ir.nir, nir_lower_point_size, 1.0, 255.0);
NIR_PASS(_, s, nir_lower_point_size, 1.0, 255.0);
/* The gallivm draw path doesn't support non-native-integers NIR shaders,
* st/mesa does native-integers for the screen as a whole rather than
* per-stage, and i915 FS can't do native integers. So, convert to TGSI,
* where the draw path *does* support non-native-integers.
*/
from_nir.tokens = nir_to_tgsi(s, pipe->screen);
templ = &from_nir;
}
vertex_shader = draw_create_vertex_shader(i915->draw, templ);
FREE((void *)from_nir.tokens);
return vertex_shader;
return draw_create_vertex_shader(i915->draw, templ);
}
static void

View file

@ -332,28 +332,33 @@ emit_constants(struct i915_context *i915)
OUT_BATCH((1 << nr) - 1);
for (i = 0; i < nr; i++) {
const uint32_t *c;
if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) {
/* grab user-defined constant */
c = (uint32_t *)i915_buffer(i915->constants[MESA_SHADER_FRAGMENT])
->data;
uint8_t flags = i915->fs->constant_flags[i];
uint8_t user_mask = flags >> 4;
if (!user_mask) {
const uint32_t *c = (uint32_t *)i915->fs->constants[i];
OUT_BATCH(c[0]);
OUT_BATCH(c[1]);
OUT_BATCH(c[2]);
OUT_BATCH(c[3]);
} else if (user_mask == 0xf) {
const uint32_t *c =
(uint32_t *)i915_buffer(i915->constants[MESA_SHADER_FRAGMENT])
->data;
c += 4 * i;
OUT_BATCH(c[0]);
OUT_BATCH(c[1]);
OUT_BATCH(c[2]);
OUT_BATCH(c[3]);
} else {
/* emit program constant */
c = (uint32_t *)i915->fs->constants[i];
const uint32_t *user =
(uint32_t *)i915_buffer(i915->constants[MESA_SHADER_FRAGMENT])
->data;
user += 4 * i;
const uint32_t *imm = (uint32_t *)i915->fs->constants[i];
for (unsigned ch = 0; ch < 4; ch++)
OUT_BATCH((user_mask & (1 << ch)) ? user[ch] : imm[ch]);
}
#if 0 /* debug */
{
float *f = (float *) c;
printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3],
(i915->fs->constant_flags[i] == I915_CONSTFLAG_USER
? "user" : "immediate"));
}
#endif
OUT_BATCH(*c++);
OUT_BATCH(*c++);
OUT_BATCH(*c++);
OUT_BATCH(*c++);
}
}
}

View file

@ -16,6 +16,7 @@ files_i915 = files(
'i915_flush.c',
'i915_fpc_emit.c',
'i915_fpc.h',
'i915_fpc_nir.c',
'i915_fpc_optimize.c',
'i915_fpc_translate.c',
'i915_prim_emit.c',