mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 02:38:04 +02:00
Merge branch 'i915-nir-backend' into 'main'
Draft: i915: s/tgsi/nir/ See merge request mesa/mesa!41371
This commit is contained in:
commit
52a3df7f76
10 changed files with 1853 additions and 169 deletions
|
|
@ -88,8 +88,15 @@ struct i915_winsys_batchbuffer;
|
|||
|
||||
#define I915_MAX_CONSTANT 32
|
||||
|
||||
/** See constant_flags[] below */
|
||||
#define I915_CONSTFLAG_USER 0x1f
|
||||
/**
|
||||
* Per-channel flags for constant_flags[].
|
||||
* Bits 0-3: channel has a compiler immediate.
|
||||
* Bits 4-7: channel has a user (UBO) value uploaded at draw time.
|
||||
* A channel is available when neither bit is set.
|
||||
*/
|
||||
#define I915_CONSTFLAG_IMM(ch) (1 << (ch))
|
||||
#define I915_CONSTFLAG_USER_CH(ch) (1 << ((ch) + 4))
|
||||
#define I915_CONSTFLAG_USER 0xf0
|
||||
|
||||
/**
|
||||
* Subclass of pipe_shader_state
|
||||
|
|
@ -103,6 +110,10 @@ struct i915_fragment_shader {
|
|||
|
||||
uint32_t *program;
|
||||
uint32_t program_len;
|
||||
uint32_t nr_alu_insn;
|
||||
uint32_t nr_tex_insn;
|
||||
uint32_t nr_tex_indirect;
|
||||
uint32_t nr_temps;
|
||||
|
||||
/**
|
||||
* constants introduced during translation.
|
||||
|
|
@ -134,12 +145,15 @@ struct i915_fragment_shader {
|
|||
} texcoords[I915_TEX_UNITS];
|
||||
|
||||
bool reads_pntc;
|
||||
bool writes_z;
|
||||
|
||||
unsigned num_inputs;
|
||||
uint8_t input_semantic_name[PIPE_MAX_SHADER_INPUTS];
|
||||
uint8_t input_semantic_index[PIPE_MAX_SHADER_INPUTS];
|
||||
|
||||
/* Set if the shader is an internal (blit, etc.) shader that shouldn't debug
|
||||
* log by default. */
|
||||
bool internal;
|
||||
|
||||
char *error; /* Any error message from compiling this shader (or NULL) */
|
||||
char *error;
|
||||
};
|
||||
|
||||
struct i915_cache_context;
|
||||
|
|
|
|||
|
|
@ -136,6 +136,15 @@ swizzle(int reg, uint32_t x, uint32_t y, uint32_t z, uint32_t w)
|
|||
CHANNEL_SRC(GET_CHANNEL_SRC(reg, w), 3));
|
||||
}
|
||||
|
||||
static inline int
|
||||
negate(int reg, int x, int y, int z, int w)
|
||||
{
|
||||
return reg ^ (x << UREG_CHANNEL_X_NEGATE_SHIFT |
|
||||
y << UREG_CHANNEL_Y_NEGATE_SHIFT |
|
||||
z << UREG_CHANNEL_Z_NEGATE_SHIFT |
|
||||
w << UREG_CHANNEL_W_NEGATE_SHIFT);
|
||||
}
|
||||
|
||||
#define A0_DEST(reg) (((reg)&UREG_TYPE_NR_MASK) >> UREG_A0_DEST_SHIFT_LEFT)
|
||||
#define D0_DEST(reg) (((reg)&UREG_TYPE_NR_MASK) >> UREG_A0_DEST_SHIFT_LEFT)
|
||||
#define T0_DEST(reg) (((reg)&UREG_TYPE_NR_MASK) >> UREG_A0_DEST_SHIFT_LEFT)
|
||||
|
|
@ -173,8 +182,21 @@ swizzle(int reg, uint32_t x, uint32_t y, uint32_t z, uint32_t w)
|
|||
*/
|
||||
extern void i915_translate_fragment_program(struct i915_context *i915,
|
||||
struct i915_fragment_shader *fs);
|
||||
struct corm_compile_opts {
|
||||
bool deferred_const;
|
||||
bool seq_sne_opt;
|
||||
bool late_scalar;
|
||||
};
|
||||
|
||||
extern void i915_translate_fragment_program_nir(struct i915_context *i915,
|
||||
struct i915_fragment_shader *ifs,
|
||||
struct nir_shader *s,
|
||||
const struct corm_compile_opts *opts);
|
||||
extern void i915_use_passthrough_shader(struct i915_fragment_shader *fs);
|
||||
extern void i915_program_error(struct i915_fp_compile *p, const char *msg, ...);
|
||||
|
||||
extern uint32_t i915_get_temp(struct i915_fp_compile *p);
|
||||
extern void i915_release_temp(struct i915_fp_compile *p, int reg);
|
||||
extern uint32_t i915_get_utemp(struct i915_fp_compile *p);
|
||||
extern void i915_release_utemps(struct i915_fp_compile *p);
|
||||
|
||||
|
|
@ -191,6 +213,8 @@ extern uint32_t i915_emit_decl(struct i915_fp_compile *p, uint32_t type,
|
|||
uint32_t nr, uint32_t d0_flags);
|
||||
|
||||
extern uint32_t i915_emit_const1f(struct i915_fp_compile *p, float c0);
|
||||
extern uint32_t i915_emit_const1f_prefer(struct i915_fp_compile *p, float c0,
|
||||
int preferred_reg);
|
||||
|
||||
extern uint32_t i915_emit_const2f(struct i915_fp_compile *p, float c0,
|
||||
float c1);
|
||||
|
|
|
|||
|
|
@ -25,11 +25,45 @@
|
|||
*
|
||||
**************************************************************************/
|
||||
|
||||
#include <stdarg.h>
|
||||
|
||||
#include "util/ralloc.h"
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "i915_context.h"
|
||||
#include "i915_fpc.h"
|
||||
#include "i915_reg.h"
|
||||
|
||||
void
|
||||
i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
|
||||
{
|
||||
va_list args;
|
||||
va_start(args, msg);
|
||||
ralloc_vasprintf_append(&p->error, msg, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
static const unsigned passthrough_program[] = {
|
||||
_3DSTATE_PIXEL_SHADER_PROGRAM | ((1 * 3) - 1),
|
||||
(A0_MOV | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | A0_DEST_CHANNEL_ALL |
|
||||
(REG_TYPE_R << A0_SRC0_TYPE_SHIFT) | (0 << A0_SRC0_NR_SHIFT)),
|
||||
((SRC_ONE << A1_SRC0_CHANNEL_X_SHIFT) |
|
||||
(SRC_ZERO << A1_SRC0_CHANNEL_Y_SHIFT) |
|
||||
(SRC_ZERO << A1_SRC0_CHANNEL_Z_SHIFT) |
|
||||
(SRC_ONE << A1_SRC0_CHANNEL_W_SHIFT)),
|
||||
0};
|
||||
|
||||
void
|
||||
i915_use_passthrough_shader(struct i915_fragment_shader *fs)
|
||||
{
|
||||
fs->program = (uint32_t *)MALLOC(sizeof(passthrough_program));
|
||||
if (fs->program) {
|
||||
memcpy(fs->program, passthrough_program, sizeof(passthrough_program));
|
||||
fs->program_len = ARRAY_SIZE(passthrough_program);
|
||||
}
|
||||
fs->num_constants = 0;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
i915_get_temp(struct i915_fp_compile *p)
|
||||
{
|
||||
|
|
@ -43,7 +77,7 @@ i915_get_temp(struct i915_fp_compile *p)
|
|||
return bit - 1;
|
||||
}
|
||||
|
||||
static void
|
||||
void
|
||||
i915_release_temp(struct i915_fp_compile *p, int reg)
|
||||
{
|
||||
p->temp_flag &= ~(1 << reg);
|
||||
|
|
@ -179,8 +213,6 @@ i915_emit_texld(struct i915_fp_compile *p, uint32_t dest, uint32_t destmask,
|
|||
{
|
||||
const uint32_t k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord));
|
||||
|
||||
int temp = -1;
|
||||
|
||||
uint32_t coord_used = 0xf << UREG_CHANNEL_X_SHIFT;
|
||||
if (coord_mask & TGSI_WRITEMASK_Y)
|
||||
coord_used |= 0xf << UREG_CHANNEL_Y_SHIFT;
|
||||
|
|
@ -191,13 +223,10 @@ i915_emit_texld(struct i915_fp_compile *p, uint32_t dest, uint32_t destmask,
|
|||
|
||||
if ((coord & coord_used) != (k & coord_used) ||
|
||||
GET_UREG_TYPE(coord) == REG_TYPE_CONST) {
|
||||
/* texcoord is swizzled or negated. Need to allocate a new temporary
|
||||
* register (a utemp / unpreserved temp) won't do.
|
||||
/* texcoord is swizzled or negated. Need a temporary to hold it.
|
||||
* Use a utemp so it doesn't create a tex indirect phase boundary.
|
||||
*/
|
||||
uint32_t tempReg;
|
||||
|
||||
temp = i915_get_temp(p); /* get temp reg index */
|
||||
tempReg = UREG(REG_TYPE_R, temp); /* make i915 register */
|
||||
uint32_t tempReg = i915_get_utemp(p);
|
||||
|
||||
i915_emit_arith(p, A0_MOV, tempReg,
|
||||
A0_DEST_CHANNEL_ALL, /* dest reg, writemask */
|
||||
|
|
@ -227,11 +256,21 @@ i915_emit_texld(struct i915_fp_compile *p, uint32_t dest, uint32_t destmask,
|
|||
p->nr_tex_indirect++;
|
||||
|
||||
/* Reading from an r# register whose contents depend on output of the
|
||||
* current phase defines a phase boundary.
|
||||
* current phase defines a phase boundary. Prefer just bumping the
|
||||
* phase count (free), but if we'd exceed the HW limit, copy to a
|
||||
* utemp instead (costs 1 ALU instruction).
|
||||
*/
|
||||
if (GET_UREG_TYPE(coord) == REG_TYPE_R &&
|
||||
p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect)
|
||||
p->nr_tex_indirect++;
|
||||
p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect) {
|
||||
if (p->nr_tex_indirect + 1 < I915_MAX_TEX_INDIRECT) {
|
||||
p->nr_tex_indirect++;
|
||||
} else {
|
||||
uint32_t tmp = i915_get_utemp(p);
|
||||
i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0,
|
||||
coord, 0, 0);
|
||||
coord = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
if (p->csr < p->program + I915_PROGRAM_SIZE) {
|
||||
*(p->csr++) = (opcode | T0_DEST(dest) | T0_SAMPLER(sampler));
|
||||
|
|
@ -246,40 +285,75 @@ i915_emit_texld(struct i915_fp_compile *p, uint32_t dest, uint32_t destmask,
|
|||
p->nr_tex_insn++;
|
||||
}
|
||||
|
||||
if (temp >= 0)
|
||||
i915_release_temp(p, temp);
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
i915_try_const1f_in_reg(struct i915_fp_compile *p, float c0, unsigned reg)
|
||||
{
|
||||
struct i915_fragment_shader *ifs = p->shader;
|
||||
|
||||
for (unsigned idx = 0; idx < 4; idx++) {
|
||||
if (ifs->constant_flags[reg] & I915_CONSTFLAG_USER_CH(idx))
|
||||
continue;
|
||||
if (!(ifs->constant_flags[reg] & I915_CONSTFLAG_IMM(idx)) ||
|
||||
ifs->constants[reg][idx] == c0) {
|
||||
ifs->constants[reg][idx] = c0;
|
||||
ifs->constant_flags[reg] |= I915_CONSTFLAG_IMM(idx);
|
||||
if (reg + 1 > ifs->num_constants)
|
||||
ifs->num_constants = reg + 1;
|
||||
return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE);
|
||||
}
|
||||
}
|
||||
return UREG_BAD;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
i915_try_emit_const1f(struct i915_fp_compile *p, float c0, int preferred_reg)
|
||||
{
|
||||
if (preferred_reg >= 0) {
|
||||
uint32_t r = i915_try_const1f_in_reg(p, c0, preferred_reg);
|
||||
if (r != UREG_BAD)
|
||||
return r;
|
||||
}
|
||||
|
||||
for (unsigned reg = 0; reg < I915_MAX_CONSTANT; reg++) {
|
||||
uint32_t r = i915_try_const1f_in_reg(p, c0, reg);
|
||||
if (r != UREG_BAD)
|
||||
return r;
|
||||
}
|
||||
|
||||
i915_program_error(p, "i915_emit_const1f: out of constants");
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
i915_emit_const1f(struct i915_fp_compile *p, float c0)
|
||||
{
|
||||
struct i915_fragment_shader *ifs = p->shader;
|
||||
unsigned reg, idx;
|
||||
|
||||
if (c0 == 0.0)
|
||||
return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO);
|
||||
if (c0 == 1.0)
|
||||
return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE);
|
||||
if (c0 == -1.0)
|
||||
return negate(swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE),
|
||||
1, 1, 1, 1);
|
||||
|
||||
for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
|
||||
if (ifs->constant_flags[reg] == I915_CONSTFLAG_USER)
|
||||
continue;
|
||||
for (idx = 0; idx < 4; idx++) {
|
||||
if (!(ifs->constant_flags[reg] & (1 << idx)) ||
|
||||
ifs->constants[reg][idx] == c0) {
|
||||
ifs->constants[reg][idx] = c0;
|
||||
ifs->constant_flags[reg] |= 1 << idx;
|
||||
if (reg + 1 > ifs->num_constants)
|
||||
ifs->num_constants = reg + 1;
|
||||
return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE);
|
||||
}
|
||||
}
|
||||
}
|
||||
return i915_try_emit_const1f(p, c0, -1);
|
||||
}
|
||||
|
||||
i915_program_error(p, "i915_emit_const1f: out of constants");
|
||||
return 0;
|
||||
uint32_t
|
||||
i915_emit_const1f_prefer(struct i915_fp_compile *p, float c0,
|
||||
int preferred_reg)
|
||||
{
|
||||
if (c0 == 0.0)
|
||||
return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO);
|
||||
if (c0 == 1.0)
|
||||
return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE);
|
||||
if (c0 == -1.0)
|
||||
return negate(swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE),
|
||||
1, 1, 1, 1);
|
||||
|
||||
return i915_try_emit_const1f(p, c0, preferred_reg);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
|
|
@ -301,14 +375,15 @@ i915_emit_const2f(struct i915_fp_compile *p, float c0, float c1)
|
|||
// XXX emit swizzle here for 0, 1, -1 and any combination thereof
|
||||
// we can use swizzle + neg for that
|
||||
for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
|
||||
if (ifs->constant_flags[reg] == 0xf ||
|
||||
ifs->constant_flags[reg] == I915_CONSTFLAG_USER)
|
||||
uint8_t occupied = (ifs->constant_flags[reg] & 0xf) |
|
||||
(ifs->constant_flags[reg] >> 4);
|
||||
if (occupied == 0xf)
|
||||
continue;
|
||||
for (idx = 0; idx < 3; idx++) {
|
||||
if (!(ifs->constant_flags[reg] & (3 << idx))) {
|
||||
if (!(occupied & (3 << idx))) {
|
||||
ifs->constants[reg][idx + 0] = c0;
|
||||
ifs->constants[reg][idx + 1] = c1;
|
||||
ifs->constant_flags[reg] |= 3 << idx;
|
||||
ifs->constant_flags[reg] |= (3 << idx); /* immediate bits */
|
||||
if (reg + 1 > ifs->num_constants)
|
||||
ifs->num_constants = reg + 1;
|
||||
return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE);
|
||||
|
|
@ -330,9 +405,9 @@ i915_emit_const4f(struct i915_fp_compile *p, float c0, float c1, float c2,
|
|||
// XXX emit swizzle here for 0, 1, -1 and any combination thereof
|
||||
// we can use swizzle + neg for that
|
||||
for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
|
||||
if (ifs->constant_flags[reg] == 0xf && ifs->constants[reg][0] == c0 &&
|
||||
ifs->constants[reg][1] == c1 && ifs->constants[reg][2] == c2 &&
|
||||
ifs->constants[reg][3] == c3) {
|
||||
if ((ifs->constant_flags[reg] & 0x0f) == 0x0f &&
|
||||
ifs->constants[reg][0] == c0 && ifs->constants[reg][1] == c1 &&
|
||||
ifs->constants[reg][2] == c2 && ifs->constants[reg][3] == c3) {
|
||||
return UREG(REG_TYPE_CONST, reg);
|
||||
} else if (ifs->constant_flags[reg] == 0) {
|
||||
|
||||
|
|
@ -340,7 +415,7 @@ i915_emit_const4f(struct i915_fp_compile *p, float c0, float c1, float c2,
|
|||
ifs->constants[reg][1] = c1;
|
||||
ifs->constants[reg][2] = c2;
|
||||
ifs->constants[reg][3] = c3;
|
||||
ifs->constant_flags[reg] = 0xf;
|
||||
ifs->constant_flags[reg] = 0x0f;
|
||||
if (reg + 1 > ifs->num_constants)
|
||||
ifs->num_constants = reg + 1;
|
||||
return UREG(REG_TYPE_CONST, reg);
|
||||
|
|
|
|||
1310
src/gallium/drivers/i915/i915_fpc_nir.c
Normal file
1310
src/gallium/drivers/i915/i915_fpc_nir.c
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -405,6 +405,8 @@ i915_fpc_optimize_mov_before_tex(struct i915_optimize_context *ctx,
|
|||
target_is_texture2d(next->FullInstruction.Texture.Texture) &&
|
||||
same_src_dst_reg(&next->FullInstruction.Src[0],
|
||||
¤t->FullInstruction.Dst[0]) &&
|
||||
(current->FullInstruction.Dst[0].Register.WriteMask &
|
||||
i915_tex_mask(next)) == i915_tex_mask(next) &&
|
||||
is_unswizzled(¤t->FullInstruction.Src[0], i915_tex_mask(next)) &&
|
||||
unused_from(ctx, ¤t->FullInstruction.Dst[0], index)) {
|
||||
memcpy(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0],
|
||||
|
|
|
|||
|
|
@ -54,55 +54,9 @@
|
|||
* Simple pass-through fragment shader to use when we don't have
|
||||
* a real shader (or it fails to compile for some reason).
|
||||
*/
|
||||
static unsigned passthrough_program[] = {
|
||||
_3DSTATE_PIXEL_SHADER_PROGRAM | ((1 * 3) - 1),
|
||||
/* move to output color:
|
||||
*/
|
||||
(A0_MOV | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | A0_DEST_CHANNEL_ALL |
|
||||
(REG_TYPE_R << A0_SRC0_TYPE_SHIFT) | (0 << A0_SRC0_NR_SHIFT)),
|
||||
((SRC_ONE << A1_SRC0_CHANNEL_X_SHIFT) |
|
||||
(SRC_ZERO << A1_SRC0_CHANNEL_Y_SHIFT) |
|
||||
(SRC_ZERO << A1_SRC0_CHANNEL_Z_SHIFT) |
|
||||
(SRC_ONE << A1_SRC0_CHANNEL_W_SHIFT)),
|
||||
0};
|
||||
|
||||
/**
|
||||
* component-wise negation of ureg
|
||||
*/
|
||||
static inline int
|
||||
negate(int reg, int x, int y, int z, int w)
|
||||
{
|
||||
/* Another neat thing about the UREG representation */
|
||||
return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) |
|
||||
((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) |
|
||||
((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) |
|
||||
((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT));
|
||||
}
|
||||
|
||||
/**
|
||||
* In the event of a translation failure, we'll generate a simple color
|
||||
* pass-through program.
|
||||
*/
|
||||
static void
|
||||
i915_use_passthrough_shader(struct i915_fragment_shader *fs)
|
||||
{
|
||||
fs->program = (uint32_t *)MALLOC(sizeof(passthrough_program));
|
||||
if (fs->program) {
|
||||
memcpy(fs->program, passthrough_program, sizeof(passthrough_program));
|
||||
fs->program_len = ARRAY_SIZE(passthrough_program);
|
||||
}
|
||||
fs->num_constants = 0;
|
||||
}
|
||||
|
||||
void
|
||||
i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
|
||||
{
|
||||
va_list args;
|
||||
va_start(args, msg);
|
||||
ralloc_vasprintf_append(&p->error, msg, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
get_mapping(struct i915_fragment_shader *fs, enum tgsi_semantic semantic,
|
||||
int index)
|
||||
|
|
@ -1006,12 +960,11 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
|
|||
p->nr_decl_insn, I915_MAX_DECL_INSN);
|
||||
}
|
||||
|
||||
/* hw doesn't seem to like empty frag programs (num_instructions == 1 is just
|
||||
* TGSI_END), even when the depth write fixup gets emitted below - maybe that
|
||||
* one is fishy, too?
|
||||
*/
|
||||
if (ifs->info.num_instructions == 1)
|
||||
i915_program_error(p, "Empty fragment shader");
|
||||
if (ifs->info.num_instructions == 1) {
|
||||
i915_use_passthrough_shader(ifs);
|
||||
ifs->nr_alu_insn = 1;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (strlen(p->error) != 0) {
|
||||
i915_use_passthrough_shader(ifs);
|
||||
|
|
@ -1024,6 +977,10 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
|
|||
assert(!ifs->program);
|
||||
|
||||
ifs->program_len = decl_size + program_size;
|
||||
ifs->nr_alu_insn = p->nr_alu_insn;
|
||||
ifs->nr_tex_insn = p->nr_tex_insn;
|
||||
ifs->nr_tex_indirect = p->nr_tex_indirect;
|
||||
ifs->nr_temps = util_bitcount(p->temp_flag);
|
||||
ifs->program = (uint32_t *)MALLOC(ifs->program_len * sizeof(uint32_t));
|
||||
memcpy(ifs->program, p->declarations, decl_size * sizeof(uint32_t));
|
||||
memcpy(&ifs->program[decl_size], p->program,
|
||||
|
|
@ -1032,14 +989,16 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
|
|||
if (i915) {
|
||||
util_debug_message(
|
||||
&i915->debug, SHADER_INFO,
|
||||
"%s shader: %d inst, %d tex, %d tex_indirect, %d temps, %d const",
|
||||
"%s shader: %d instructions, %d alu, %d tex, %d tex_indirect, "
|
||||
"%d temps, %d const",
|
||||
_mesa_shader_stage_to_abbrev(MESA_SHADER_FRAGMENT),
|
||||
(int)program_size, p->nr_tex_insn, p->nr_tex_indirect,
|
||||
p->shader->info.file_max[TGSI_FILE_TEMPORARY] + 1,
|
||||
ifs->num_constants);
|
||||
ifs->nr_alu_insn + ifs->nr_tex_insn,
|
||||
ifs->nr_alu_insn, ifs->nr_tex_insn, ifs->nr_tex_indirect,
|
||||
ifs->nr_temps, ifs->num_constants);
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
if (strlen(p->error) != 0)
|
||||
ifs->error = p->error;
|
||||
else
|
||||
|
|
|
|||
|
|
@ -176,6 +176,8 @@ i915_optimize_nir(struct nir_shader *s)
|
|||
{
|
||||
bool progress;
|
||||
|
||||
NIR_PASS(_, s, nir_lower_int_to_float);
|
||||
|
||||
do {
|
||||
progress = false;
|
||||
|
||||
|
|
@ -212,6 +214,11 @@ i915_optimize_nir(struct nir_shader *s)
|
|||
|
||||
} while (progress);
|
||||
|
||||
NIR_PASS(_, s, nir_lower_alu_to_scalar, NULL, NULL);
|
||||
NIR_PASS(_, s, nir_lower_bool_to_float, false);
|
||||
NIR_PASS(_, s, nir_opt_algebraic);
|
||||
NIR_PASS(_, s, nir_opt_dce);
|
||||
|
||||
NIR_PASS(progress, s, nir_remove_dead_variables, nir_var_function_temp,
|
||||
NULL);
|
||||
|
||||
|
|
|
|||
|
|
@ -31,7 +31,9 @@
|
|||
#include "compiler/nir/nir_builder.h"
|
||||
#include "draw/draw_context.h"
|
||||
#include "nir/nir_to_tgsi.h"
|
||||
#include "tgsi/tgsi_from_mesa.h"
|
||||
#include "tgsi/tgsi_parse.h"
|
||||
#include "tgsi/tgsi_scan.h"
|
||||
#include "util/u_helpers.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_math.h"
|
||||
|
|
@ -542,6 +544,37 @@ static const struct nir_to_tgsi_options ntt_options = {
|
|||
.lower_fabs = true,
|
||||
};
|
||||
|
||||
static int
|
||||
type_size(const struct glsl_type *type, bool bindless)
|
||||
{
|
||||
return glsl_count_attribute_slots(type, false);
|
||||
}
|
||||
|
||||
static bool
|
||||
scalarize_vector_bools(const nir_instr *instr, const void *data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_alu)
|
||||
return false;
|
||||
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
||||
return alu->op == nir_op_bcsel ||
|
||||
alu->op == nir_op_fcsel_ge ||
|
||||
alu->op == nir_op_fcsel_gt;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_fsqrt_filter(const nir_instr *instr, UNUSED const void *data)
|
||||
{
|
||||
return instr->type == nir_instr_type_alu &&
|
||||
nir_instr_as_alu(instr)->op == nir_op_fsqrt;
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
lower_fsqrt_impl(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
||||
{
|
||||
nir_def *src = nir_instr_as_alu(instr)->src[0].src.ssa;
|
||||
return nir_fmul(b, src, nir_frsq(b, src));
|
||||
}
|
||||
|
||||
static char *
|
||||
i915_check_control_flow(nir_shader *s)
|
||||
{
|
||||
|
|
@ -565,6 +598,94 @@ i915_check_control_flow(nir_shader *s)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
enum i915_fs_mode {
|
||||
I915_FS_TGSI,
|
||||
I915_FS_NIR,
|
||||
I915_FS_BOTH,
|
||||
};
|
||||
|
||||
static enum i915_fs_mode
|
||||
i915_get_fs_mode(void)
|
||||
{
|
||||
const char *env = debug_get_option("I915_FS", "both");
|
||||
if (!strcmp(env, "tgsi"))
|
||||
return I915_FS_TGSI;
|
||||
if (!strcmp(env, "nir"))
|
||||
return I915_FS_NIR;
|
||||
return I915_FS_BOTH;
|
||||
}
|
||||
|
||||
static void
|
||||
i915_populate_fs_metadata(struct i915_fragment_shader *ifs, nir_shader *s)
|
||||
{
|
||||
ifs->num_inputs = 0;
|
||||
ifs->writes_z = s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH);
|
||||
|
||||
nir_foreach_shader_in_variable(var, s) {
|
||||
unsigned sem_name, sem_index;
|
||||
tgsi_get_gl_varying_semantic((gl_varying_slot)var->data.location, true,
|
||||
&sem_name, &sem_index);
|
||||
unsigned idx = ifs->num_inputs++;
|
||||
ifs->input_semantic_name[idx] = sem_name;
|
||||
ifs->input_semantic_index[idx] = sem_index;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
i915_compile_tgsi(struct i915_context *i915,
|
||||
struct i915_fragment_shader *ifs,
|
||||
struct pipe_screen *screen,
|
||||
nir_shader *nir_clone)
|
||||
{
|
||||
ifs->state.tokens = nir_to_tgsi_options(nir_clone, screen, &ntt_options);
|
||||
ifs->state.type = PIPE_SHADER_IR_TGSI;
|
||||
tgsi_scan_shader(ifs->state.tokens, &ifs->info);
|
||||
i915_translate_fragment_program(i915, ifs);
|
||||
}
|
||||
|
||||
static bool
|
||||
corm_fs_better(const struct i915_fragment_shader *a,
|
||||
const struct i915_fragment_shader *b)
|
||||
{
|
||||
if (a->nr_tex_indirect != b->nr_tex_indirect)
|
||||
return a->nr_tex_indirect < b->nr_tex_indirect;
|
||||
if (a->nr_alu_insn != b->nr_alu_insn)
|
||||
return a->nr_alu_insn < b->nr_alu_insn;
|
||||
if (a->nr_temps != b->nr_temps)
|
||||
return a->nr_temps < b->nr_temps;
|
||||
return a->num_constants < b->num_constants;
|
||||
}
|
||||
|
||||
static const char *
|
||||
corm_win_reason(const struct i915_fragment_shader *winner,
|
||||
const struct i915_fragment_shader *loser,
|
||||
char *buf, size_t len)
|
||||
{
|
||||
if (!loser) {
|
||||
snprintf(buf, len, "only");
|
||||
return buf;
|
||||
}
|
||||
int da = (int)winner->nr_alu_insn - (int)loser->nr_alu_insn;
|
||||
int dp = (int)winner->nr_tex_indirect - (int)loser->nr_tex_indirect;
|
||||
int dt = (int)winner->nr_temps - (int)loser->nr_temps;
|
||||
if (dp != 0)
|
||||
snprintf(buf, len, "%+d phase", dp);
|
||||
else if (da != 0)
|
||||
snprintf(buf, len, "%+d alu", da);
|
||||
else if (dt != 0)
|
||||
snprintf(buf, len, "%+d temps", dt);
|
||||
else if ((int)winner->num_constants != (int)loser->num_constants)
|
||||
snprintf(buf, len, "%+d const",
|
||||
(int)winner->num_constants - (int)loser->num_constants);
|
||||
else if (winner->program_len == loser->program_len &&
|
||||
!memcmp(winner->program, loser->program,
|
||||
winner->program_len * sizeof(uint32_t)))
|
||||
snprintf(buf, len, "identical");
|
||||
else
|
||||
snprintf(buf, len, "tied");
|
||||
return buf;
|
||||
}
|
||||
|
||||
static void *
|
||||
i915_create_fs_state(struct pipe_context *pipe,
|
||||
const struct pipe_shader_state *templ)
|
||||
|
|
@ -576,39 +697,222 @@ i915_create_fs_state(struct pipe_context *pipe,
|
|||
|
||||
ifs->draw_data = draw_create_fragment_shader(i915->draw, templ);
|
||||
|
||||
if (templ->type == PIPE_SHADER_IR_NIR) {
|
||||
nir_shader *s = templ->ir.nir;
|
||||
ifs->internal = s->info.internal;
|
||||
|
||||
char *msg = i915_check_control_flow(s);
|
||||
if (msg) {
|
||||
if (I915_DBG_ON(DBG_FS) &&
|
||||
(!s->info.internal || NIR_DEBUG(PRINT_INTERNAL))) {
|
||||
mesa_logi("failing shader:");
|
||||
nir_log_shaderi(s);
|
||||
}
|
||||
if (templ->report_compile_error) {
|
||||
((struct pipe_shader_state *)templ)->error_message = strdup(msg);
|
||||
ralloc_free(s);
|
||||
i915_delete_fs_state(NULL, ifs);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
ifs->state.tokens = nir_to_tgsi_options(s, pipe->screen, &ntt_options);
|
||||
} else {
|
||||
assert(templ->type == PIPE_SHADER_IR_TGSI);
|
||||
/* we need to keep a local copy of the tokens */
|
||||
if (templ->type == PIPE_SHADER_IR_TGSI) {
|
||||
ifs->state.tokens = tgsi_dup_tokens(templ->tokens);
|
||||
ifs->state.type = PIPE_SHADER_IR_TGSI;
|
||||
ifs->internal = i915->no_log_program_errors;
|
||||
tgsi_scan_shader(ifs->state.tokens, &ifs->info);
|
||||
i915_translate_fragment_program(i915, ifs);
|
||||
return ifs;
|
||||
}
|
||||
|
||||
ifs->state.type = PIPE_SHADER_IR_TGSI;
|
||||
assert(templ->type == PIPE_SHADER_IR_NIR);
|
||||
nir_shader *s = templ->ir.nir;
|
||||
ifs->internal = s->info.internal;
|
||||
|
||||
tgsi_scan_shader(ifs->state.tokens, &ifs->info);
|
||||
bool debug = I915_DBG_ON(DBG_FS) &&
|
||||
(!s->info.internal || NIR_DEBUG(PRINT_INTERNAL));
|
||||
|
||||
char *msg = i915_check_control_flow(s);
|
||||
if (msg) {
|
||||
if (debug) {
|
||||
mesa_logi("failing shader:");
|
||||
nir_log_shaderi(s);
|
||||
}
|
||||
if (templ->report_compile_error) {
|
||||
((struct pipe_shader_state *)templ)->error_message = strdup(msg);
|
||||
ralloc_free(s);
|
||||
i915_delete_fs_state(NULL, ifs);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static enum i915_fs_mode fs_mode = -1;
|
||||
if (fs_mode == (enum i915_fs_mode)-1)
|
||||
fs_mode = i915_get_fs_mode();
|
||||
|
||||
bool try_nir = (fs_mode == I915_FS_NIR || fs_mode == I915_FS_BOTH);
|
||||
bool try_tgsi = (fs_mode == I915_FS_TGSI || fs_mode == I915_FS_BOTH);
|
||||
|
||||
struct i915_fragment_shader tgsi_fs = {0};
|
||||
|
||||
static const struct corm_compile_opts corm_variants[] = {
|
||||
{ .deferred_const = false, .seq_sne_opt = false },
|
||||
{ .deferred_const = false, .seq_sne_opt = true },
|
||||
{ .deferred_const = true, .seq_sne_opt = false },
|
||||
{ .deferred_const = true, .seq_sne_opt = true },
|
||||
{ .deferred_const = false, .seq_sne_opt = false, .late_scalar = true },
|
||||
{ .deferred_const = false, .seq_sne_opt = true, .late_scalar = true },
|
||||
{ .deferred_const = true, .seq_sne_opt = false, .late_scalar = true },
|
||||
{ .deferred_const = true, .seq_sne_opt = true, .late_scalar = true },
|
||||
};
|
||||
|
||||
struct i915_fragment_shader nir_results[ARRAY_SIZE(corm_variants)];
|
||||
int best_nir = -1;
|
||||
|
||||
if (try_nir) {
|
||||
nir_shader *nir_s = try_tgsi ? nir_shader_clone(NULL, s) : s;
|
||||
NIR_PASS(_, nir_s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
|
||||
type_size, (nir_lower_io_options)0);
|
||||
NIR_PASS(_, nir_s, nir_lower_alu_to_scalar, scalarize_vector_bools, NULL);
|
||||
NIR_PASS(_, nir_s, nir_opt_vectorize, NULL, NULL);
|
||||
NIR_PASS(_, nir_s, nir_lower_bool_to_float, false);
|
||||
NIR_PASS(_, nir_s, nir_shader_lower_instructions, lower_fsqrt_filter,
|
||||
lower_fsqrt_impl, NULL);
|
||||
NIR_PASS(_, nir_s, nir_opt_copy_prop);
|
||||
NIR_PASS(_, nir_s, nir_opt_cse);
|
||||
NIR_PASS(_, nir_s, nir_opt_dce);
|
||||
NIR_PASS(_, nir_s, nir_opt_algebraic);
|
||||
NIR_PASS(_, nir_s, nir_opt_algebraic_late);
|
||||
NIR_PASS(_, nir_s, nir_opt_dce);
|
||||
NIR_PASS(_, nir_s, nir_opt_shrink_vectors, false);
|
||||
NIR_PASS(_, nir_s, nir_opt_copy_prop);
|
||||
NIR_PASS(_, nir_s, nir_opt_dce);
|
||||
nir_index_ssa_defs(nir_shader_get_entrypoint(nir_s));
|
||||
|
||||
for (unsigned v = 0; v < ARRAY_SIZE(corm_variants); v++) {
|
||||
nir_shader *variant_nir = nir_shader_clone(NULL, nir_s);
|
||||
if (corm_variants[v].late_scalar) {
|
||||
NIR_PASS(_, variant_nir, nir_lower_alu_to_scalar, NULL, NULL);
|
||||
NIR_PASS(_, variant_nir, nir_opt_copy_prop);
|
||||
NIR_PASS(_, variant_nir, nir_opt_algebraic);
|
||||
NIR_PASS(_, variant_nir, nir_opt_dce);
|
||||
nir_index_ssa_defs(nir_shader_get_entrypoint(variant_nir));
|
||||
}
|
||||
memset(&nir_results[v], 0, sizeof(nir_results[v]));
|
||||
i915_populate_fs_metadata(&nir_results[v], variant_nir);
|
||||
i915_translate_fragment_program_nir(i915, &nir_results[v],
|
||||
variant_nir, &corm_variants[v]);
|
||||
ralloc_free(variant_nir);
|
||||
|
||||
bool ok = !nir_results[v].error || !nir_results[v].error[0];
|
||||
if (ok && (best_nir < 0 ||
|
||||
corm_fs_better(&nir_results[v], &nir_results[best_nir])))
|
||||
best_nir = v;
|
||||
}
|
||||
|
||||
ralloc_free(nir_s);
|
||||
}
|
||||
|
||||
if (try_tgsi) {
|
||||
i915_compile_tgsi(i915, &tgsi_fs, pipe->screen, s);
|
||||
} else {
|
||||
ralloc_free(s);
|
||||
}
|
||||
|
||||
bool nir_ok = best_nir >= 0;
|
||||
bool tgsi_ok = try_tgsi && (!tgsi_fs.error || !tgsi_fs.error[0]);
|
||||
struct i915_fragment_shader *best_nir_fs = nir_ok ? &nir_results[best_nir] : NULL;
|
||||
|
||||
bool use_nir;
|
||||
if (nir_ok && tgsi_ok)
|
||||
use_nir = !corm_fs_better(&tgsi_fs, best_nir_fs);
|
||||
else
|
||||
use_nir = nir_ok;
|
||||
|
||||
if (debug && try_nir && try_tgsi) {
|
||||
for (unsigned v = 0; v < ARRAY_SIZE(corm_variants); v++) {
|
||||
bool ok = !nir_results[v].error || !nir_results[v].error[0];
|
||||
mesa_logi(" NIR[dc=%d,ss=%d]: %s (%d ALU, %d phase, %d temps)%s",
|
||||
corm_variants[v].deferred_const,
|
||||
corm_variants[v].seq_sne_opt,
|
||||
ok ? "ok" : "FAIL",
|
||||
ok ? nir_results[v].nr_alu_insn : 0,
|
||||
ok ? nir_results[v].nr_tex_indirect : 0,
|
||||
ok ? nir_results[v].nr_temps : 0,
|
||||
(int)v == best_nir ? " *" : "");
|
||||
}
|
||||
mesa_logi(" TGSI: %s (%d ALU, %d phase, %d temps)",
|
||||
tgsi_ok ? "ok" : "FAIL",
|
||||
tgsi_ok ? tgsi_fs.nr_alu_insn : 0,
|
||||
tgsi_ok ? tgsi_fs.nr_tex_indirect : 0,
|
||||
tgsi_ok ? tgsi_fs.nr_temps : 0);
|
||||
mesa_logi(" -> %s%s", use_nir ? "NIR" : "TGSI",
|
||||
use_nir ? (corm_fs_better(best_nir_fs, &tgsi_fs)
|
||||
? " (better)" : " (tied)") : "");
|
||||
}
|
||||
|
||||
/* Free non-winning NIR variants */
|
||||
if (try_nir) {
|
||||
for (unsigned v = 0; v < ARRAY_SIZE(corm_variants); v++) {
|
||||
if ((int)v != best_nir) {
|
||||
FREE(nir_results[v].program);
|
||||
ralloc_free(nir_results[v].error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct i915_fragment_shader *winner, *loser = NULL;
|
||||
struct i915_fragment_shader nir_loser_copy = {0};
|
||||
if (use_nir) {
|
||||
winner = best_nir_fs;
|
||||
loser = tgsi_ok ? &tgsi_fs : NULL;
|
||||
} else {
|
||||
winner = &tgsi_fs;
|
||||
if (best_nir_fs) {
|
||||
nir_loser_copy = *best_nir_fs;
|
||||
nir_loser_copy.program = NULL;
|
||||
loser = &nir_loser_copy;
|
||||
FREE(best_nir_fs->program);
|
||||
ralloc_free(best_nir_fs->error);
|
||||
}
|
||||
}
|
||||
|
||||
if (i915 && !ifs->internal) {
|
||||
bool neither = (winner->nr_alu_insn + winner->nr_tex_insn) == 0;
|
||||
char reason[32];
|
||||
if (neither)
|
||||
snprintf(reason, sizeof(reason), "neither");
|
||||
else
|
||||
corm_win_reason(winner, loser, reason, sizeof(reason));
|
||||
util_debug_message(
|
||||
&i915->debug, SHADER_INFO,
|
||||
"%s shader [%s, %s]: %d instructions, %d alu, %d tex, "
|
||||
"%d tex_indirect, %d temps, %d const",
|
||||
_mesa_shader_stage_to_abbrev(MESA_SHADER_FRAGMENT),
|
||||
neither ? "FAIL" : use_nir ? "NIR" : "TGSI", reason,
|
||||
winner->nr_alu_insn + winner->nr_tex_insn,
|
||||
winner->nr_alu_insn, winner->nr_tex_insn, winner->nr_tex_indirect,
|
||||
winner->nr_temps, winner->num_constants);
|
||||
}
|
||||
|
||||
ifs->program = winner->program;
|
||||
ifs->program_len = winner->program_len;
|
||||
ifs->nr_alu_insn = winner->nr_alu_insn;
|
||||
ifs->nr_tex_insn = winner->nr_tex_insn;
|
||||
ifs->nr_tex_indirect = winner->nr_tex_indirect;
|
||||
ifs->nr_temps = winner->nr_temps;
|
||||
ifs->num_constants = winner->num_constants;
|
||||
memcpy(ifs->constants, winner->constants, sizeof(ifs->constants));
|
||||
memcpy(ifs->constant_flags, winner->constant_flags,
|
||||
sizeof(ifs->constant_flags));
|
||||
memcpy(ifs->texcoords, winner->texcoords, sizeof(ifs->texcoords));
|
||||
ifs->reads_pntc = winner->reads_pntc;
|
||||
ifs->writes_z = winner->writes_z;
|
||||
ifs->num_inputs = winner->num_inputs;
|
||||
memcpy(ifs->input_semantic_name, winner->input_semantic_name,
|
||||
sizeof(ifs->input_semantic_name));
|
||||
memcpy(ifs->input_semantic_index, winner->input_semantic_index,
|
||||
sizeof(ifs->input_semantic_index));
|
||||
if (winner->error)
|
||||
ifs->error = winner->error;
|
||||
|
||||
/* The loser's info may be in use (TGSI path populates ifs->info) */
|
||||
if (try_tgsi)
|
||||
ifs->info = tgsi_fs.info;
|
||||
|
||||
if (loser) {
|
||||
FREE(loser->program);
|
||||
ralloc_free(loser->error);
|
||||
}
|
||||
if (!use_nir && try_tgsi) {
|
||||
/* TGSI won — tokens are in tgsi_fs via i915_compile_tgsi.
|
||||
* We need them for ifs->state for draw's FS pipeline. */
|
||||
ifs->state = tgsi_fs.state;
|
||||
} else if (try_tgsi) {
|
||||
FREE((void *)tgsi_fs.state.tokens);
|
||||
}
|
||||
|
||||
/* The shader's compiled to i915 instructions here */
|
||||
i915_translate_fragment_program(i915, ifs);
|
||||
if (ifs->error && templ->report_compile_error) {
|
||||
((struct pipe_shader_state *)templ)->error_message = strdup(ifs->error);
|
||||
i915_delete_fs_state(NULL, ifs);
|
||||
|
|
@ -667,28 +971,11 @@ i915_create_vs_state(struct pipe_context *pipe,
|
|||
const struct pipe_shader_state *templ)
|
||||
{
|
||||
struct i915_context *i915 = i915_context(pipe);
|
||||
void *vertex_shader;
|
||||
|
||||
struct pipe_shader_state from_nir = {PIPE_SHADER_IR_TGSI};
|
||||
if (templ->type == PIPE_SHADER_IR_NIR) {
|
||||
nir_shader *s = templ->ir.nir;
|
||||
if (templ->type == PIPE_SHADER_IR_NIR)
|
||||
NIR_PASS(_, templ->ir.nir, nir_lower_point_size, 1.0, 255.0);
|
||||
|
||||
NIR_PASS(_, s, nir_lower_point_size, 1.0, 255.0);
|
||||
|
||||
/* The gallivm draw path doesn't support non-native-integers NIR shaders,
|
||||
* st/mesa does native-integers for the screen as a whole rather than
|
||||
* per-stage, and i915 FS can't do native integers. So, convert to TGSI,
|
||||
* where the draw path *does* support non-native-integers.
|
||||
*/
|
||||
from_nir.tokens = nir_to_tgsi(s, pipe->screen);
|
||||
templ = &from_nir;
|
||||
}
|
||||
|
||||
vertex_shader = draw_create_vertex_shader(i915->draw, templ);
|
||||
|
||||
FREE((void *)from_nir.tokens);
|
||||
|
||||
return vertex_shader;
|
||||
return draw_create_vertex_shader(i915->draw, templ);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -332,28 +332,33 @@ emit_constants(struct i915_context *i915)
|
|||
OUT_BATCH((1 << nr) - 1);
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
const uint32_t *c;
|
||||
if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) {
|
||||
/* grab user-defined constant */
|
||||
c = (uint32_t *)i915_buffer(i915->constants[MESA_SHADER_FRAGMENT])
|
||||
->data;
|
||||
uint8_t flags = i915->fs->constant_flags[i];
|
||||
uint8_t user_mask = flags >> 4;
|
||||
|
||||
if (!user_mask) {
|
||||
const uint32_t *c = (uint32_t *)i915->fs->constants[i];
|
||||
OUT_BATCH(c[0]);
|
||||
OUT_BATCH(c[1]);
|
||||
OUT_BATCH(c[2]);
|
||||
OUT_BATCH(c[3]);
|
||||
} else if (user_mask == 0xf) {
|
||||
const uint32_t *c =
|
||||
(uint32_t *)i915_buffer(i915->constants[MESA_SHADER_FRAGMENT])
|
||||
->data;
|
||||
c += 4 * i;
|
||||
OUT_BATCH(c[0]);
|
||||
OUT_BATCH(c[1]);
|
||||
OUT_BATCH(c[2]);
|
||||
OUT_BATCH(c[3]);
|
||||
} else {
|
||||
/* emit program constant */
|
||||
c = (uint32_t *)i915->fs->constants[i];
|
||||
const uint32_t *user =
|
||||
(uint32_t *)i915_buffer(i915->constants[MESA_SHADER_FRAGMENT])
|
||||
->data;
|
||||
user += 4 * i;
|
||||
const uint32_t *imm = (uint32_t *)i915->fs->constants[i];
|
||||
for (unsigned ch = 0; ch < 4; ch++)
|
||||
OUT_BATCH((user_mask & (1 << ch)) ? user[ch] : imm[ch]);
|
||||
}
|
||||
#if 0 /* debug */
|
||||
{
|
||||
float *f = (float *) c;
|
||||
printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3],
|
||||
(i915->fs->constant_flags[i] == I915_CONSTFLAG_USER
|
||||
? "user" : "immediate"));
|
||||
}
|
||||
#endif
|
||||
OUT_BATCH(*c++);
|
||||
OUT_BATCH(*c++);
|
||||
OUT_BATCH(*c++);
|
||||
OUT_BATCH(*c++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ files_i915 = files(
|
|||
'i915_flush.c',
|
||||
'i915_fpc_emit.c',
|
||||
'i915_fpc.h',
|
||||
'i915_fpc_nir.c',
|
||||
'i915_fpc_optimize.c',
|
||||
'i915_fpc_translate.c',
|
||||
'i915_prim_emit.c',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue