i915g: Do generic remapping.

With complex shaders there are often "holes" in the fs inputs, and we only
have 8 tex coorsd to map those to. To fix this, we remap fs inputs to [0..8].
This lets us to run many more GLSL programs.
This commit is contained in:
Stéphane Marchesin 2011-06-06 20:40:16 -07:00
parent 2292025c49
commit abb4365269
3 changed files with 119 additions and 92 deletions

View file

@ -124,6 +124,12 @@ struct i915_fragment_shader
* Else, the bitmask indicates which components are occupied by immediates.
*/
ubyte constant_flags[I915_MAX_CONSTANT];
/**
* The mapping between generics and hw texture coords.
* We need to share this between the vertex and fragment stages.
**/
int generic_mapping[I915_TEX_UNITS];
};

View file

@ -133,7 +133,21 @@ i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
p->error = 1;
}
static uint get_mapping(struct i915_fragment_shader* fs, int unit)
{
int i;
for (i = 0; i < I915_TEX_UNITS; i++)
{
if (fs->generic_mapping[i] == -1) {
fs->generic_mapping[i] = unit;
return i;
}
if (fs->generic_mapping[i] == unit)
return i;
}
debug_printf("Exceeded max generics\n");
return 0;
}
/**
* Construct a ureg for the given source register. Will emit
@ -141,7 +155,8 @@ i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
*/
static uint
src_vector(struct i915_fp_compile *p,
const struct tgsi_full_src_register *source)
const struct tgsi_full_src_register *source,
struct i915_fragment_shader* fs)
{
uint index = source->Register.Index;
uint src = 0, sem_name, sem_ind;
@ -192,15 +207,11 @@ src_vector(struct i915_fp_compile *p,
src = swizzle(src, W, W, W, W);
break;
case TGSI_SEMANTIC_GENERIC:
if (sem_ind < 8)
/* a texcoord */
src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + sem_ind, D0_CHANNEL_ALL);
else if ( (sem_ind >= 10) && (sem_ind < 18) )
/* a varying */
src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + sem_ind - 10, D0_CHANNEL_ALL);
else
debug_printf("%s: unhandled generic %d\n", __func__, sem_ind);
break;
{
int real_tex_unit = get_mapping(fs, sem_ind);
src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL);
break;
}
default:
i915_program_error(p, "Bad source->Index");
return 0;
@ -342,13 +353,14 @@ translate_tex_src_target(struct i915_fp_compile *p, uint tex)
static void
emit_tex(struct i915_fp_compile *p,
const struct tgsi_full_instruction *inst,
uint opcode)
uint opcode,
struct i915_fragment_shader* fs)
{
uint texture = inst->Texture.Texture;
uint unit = inst->Src[1].Register.Index;
uint tex = translate_tex_src_target( p, texture );
uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex);
uint coord = src_vector( p, &inst->Src[0]);
uint coord = src_vector( p, &inst->Src[0], fs);
i915_emit_texld( p,
get_result_vector( p, &inst->Dst[0] ),
@ -367,15 +379,16 @@ emit_tex(struct i915_fp_compile *p,
static void
emit_simple_arith(struct i915_fp_compile *p,
const struct tgsi_full_instruction *inst,
uint opcode, uint numArgs)
uint opcode, uint numArgs,
struct i915_fragment_shader* fs)
{
uint arg1, arg2, arg3;
assert(numArgs <= 3);
arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0] );
arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1] );
arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2] );
arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0], fs );
arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1], fs );
arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2], fs );
i915_emit_arith( p,
opcode,
@ -391,7 +404,8 @@ emit_simple_arith(struct i915_fp_compile *p,
static void
emit_simple_arith_swap2(struct i915_fp_compile *p,
const struct tgsi_full_instruction *inst,
uint opcode, uint numArgs)
uint opcode, uint numArgs,
struct i915_fragment_shader* fs)
{
struct tgsi_full_instruction inst2;
@ -402,7 +416,7 @@ emit_simple_arith_swap2(struct i915_fp_compile *p,
inst2.Src[0] = inst->Src[1];
inst2.Src[1] = inst->Src[0];
emit_simple_arith(p, &inst2, opcode, numArgs);
emit_simple_arith(p, &inst2, opcode, numArgs, fs);
}
@ -421,7 +435,8 @@ emit_simple_arith_swap2(struct i915_fp_compile *p,
*/
static void
i915_translate_instruction(struct i915_fp_compile *p,
const struct tgsi_full_instruction *inst)
const struct tgsi_full_instruction *inst,
struct i915_fragment_shader *fs)
{
uint writemask;
uint src0, src1, src2, flags;
@ -429,7 +444,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ABS:
src0 = src_vector(p, &inst->Src[0]);
src0 = src_vector(p, &inst->Src[0], fs);
i915_emit_arith(p,
A0_MAX,
get_result_vector(p, &inst->Dst[0]),
@ -438,13 +453,13 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_ADD:
emit_simple_arith(p, inst, A0_ADD, 2);
emit_simple_arith(p, inst, A0_ADD, 2, fs);
break;
case TGSI_OPCODE_CMP:
src0 = src_vector(p, &inst->Src[0]);
src1 = src_vector(p, &inst->Src[1]);
src2 = src_vector(p, &inst->Src[2]);
src0 = src_vector(p, &inst->Src[0], fs);
src1 = src_vector(p, &inst->Src[1], fs);
src2 = src_vector(p, &inst->Src[2], fs);
i915_emit_arith(p, A0_CMP,
get_result_vector(p, &inst->Dst[0]),
get_result_flags(inst),
@ -452,7 +467,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_COS:
src0 = src_vector(p, &inst->Src[0]);
src0 = src_vector(p, &inst->Src[0], fs);
tmp = i915_get_utemp(p);
i915_emit_arith(p,
@ -502,8 +517,8 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_DP2:
src0 = src_vector(p, &inst->Src[0]);
src1 = src_vector(p, &inst->Src[1]);
src0 = src_vector(p, &inst->Src[0], fs);
src1 = src_vector(p, &inst->Src[1], fs);
i915_emit_arith(p,
A0_DP3,
@ -513,16 +528,16 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_DP3:
emit_simple_arith(p, inst, A0_DP3, 2);
emit_simple_arith(p, inst, A0_DP3, 2, fs);
break;
case TGSI_OPCODE_DP4:
emit_simple_arith(p, inst, A0_DP4, 2);
emit_simple_arith(p, inst, A0_DP4, 2, fs);
break;
case TGSI_OPCODE_DPH:
src0 = src_vector(p, &inst->Src[0]);
src1 = src_vector(p, &inst->Src[1]);
src0 = src_vector(p, &inst->Src[0], fs);
src1 = src_vector(p, &inst->Src[1], fs);
i915_emit_arith(p,
A0_DP4,
@ -532,8 +547,8 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_DST:
src0 = src_vector(p, &inst->Src[0]);
src1 = src_vector(p, &inst->Src[1]);
src0 = src_vector(p, &inst->Src[0], fs);
src1 = src_vector(p, &inst->Src[1], fs);
/* result[0] = 1 * 1;
* result[1] = a[1] * b[1];
@ -553,7 +568,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_EX2:
src0 = src_vector(p, &inst->Src[0]);
src0 = src_vector(p, &inst->Src[0], fs);
i915_emit_arith(p,
A0_EXP,
@ -563,16 +578,16 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_FLR:
emit_simple_arith(p, inst, A0_FLR, 1);
emit_simple_arith(p, inst, A0_FLR, 1, fs);
break;
case TGSI_OPCODE_FRC:
emit_simple_arith(p, inst, A0_FRC, 1);
emit_simple_arith(p, inst, A0_FRC, 1, fs);
break;
case TGSI_OPCODE_KIL:
/* kill if src[0].x < 0 || src[0].y < 0 ... */
src0 = src_vector(p, &inst->Src[0]);
src0 = src_vector(p, &inst->Src[0], fs);
tmp = i915_get_utemp(p);
i915_emit_texld(p,
@ -588,7 +603,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_LG2:
src0 = src_vector(p, &inst->Src[0]);
src0 = src_vector(p, &inst->Src[0], fs);
i915_emit_arith(p,
A0_LOG,
@ -598,7 +613,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_LIT:
src0 = src_vector(p, &inst->Src[0]);
src0 = src_vector(p, &inst->Src[0], fs);
tmp = i915_get_utemp(p);
/* tmp = max( a.xyzw, a.00zw )
@ -631,9 +646,9 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_LRP:
src0 = src_vector(p, &inst->Src[0]);
src1 = src_vector(p, &inst->Src[1]);
src2 = src_vector(p, &inst->Src[2]);
src0 = src_vector(p, &inst->Src[0], fs);
src1 = src_vector(p, &inst->Src[1], fs);
src2 = src_vector(p, &inst->Src[2], fs);
flags = get_result_flags(inst);
tmp = i915_get_utemp(p);
@ -653,16 +668,16 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_MAD:
emit_simple_arith(p, inst, A0_MAD, 3);
emit_simple_arith(p, inst, A0_MAD, 3, fs);
break;
case TGSI_OPCODE_MAX:
emit_simple_arith(p, inst, A0_MAX, 2);
emit_simple_arith(p, inst, A0_MAX, 2, fs);
break;
case TGSI_OPCODE_MIN:
src0 = src_vector(p, &inst->Src[0]);
src1 = src_vector(p, &inst->Src[1]);
src0 = src_vector(p, &inst->Src[0], fs);
src1 = src_vector(p, &inst->Src[1], fs);
tmp = i915_get_utemp(p);
flags = get_result_flags(inst);
@ -679,16 +694,16 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_MOV:
emit_simple_arith(p, inst, A0_MOV, 1);
emit_simple_arith(p, inst, A0_MOV, 1, fs);
break;
case TGSI_OPCODE_MUL:
emit_simple_arith(p, inst, A0_MUL, 2);
emit_simple_arith(p, inst, A0_MUL, 2, fs);
break;
case TGSI_OPCODE_POW:
src0 = src_vector(p, &inst->Src[0]);
src1 = src_vector(p, &inst->Src[1]);
src0 = src_vector(p, &inst->Src[0], fs);
src1 = src_vector(p, &inst->Src[1], fs);
tmp = i915_get_utemp(p);
flags = get_result_flags(inst);
@ -712,7 +727,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_RCP:
src0 = src_vector(p, &inst->Src[0]);
src0 = src_vector(p, &inst->Src[0], fs);
i915_emit_arith(p,
A0_RCP,
@ -722,7 +737,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_RSQ:
src0 = src_vector(p, &inst->Src[0]);
src0 = src_vector(p, &inst->Src[0], fs);
i915_emit_arith(p,
A0_RSQ,
@ -732,7 +747,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_SCS:
src0 = src_vector(p, &inst->Src[0]);
src0 = src_vector(p, &inst->Src[0], fs);
tmp = i915_get_utemp(p);
/*
@ -797,8 +812,8 @@ i915_translate_instruction(struct i915_fp_compile *p,
case TGSI_OPCODE_SEQ:
/* if we're both >= and <= then we're == */
src0 = src_vector(p, &inst->Src[0]);
src1 = src_vector(p, &inst->Src[1]);
src0 = src_vector(p, &inst->Src[0], fs);
src1 = src_vector(p, &inst->Src[1], fs);
tmp = i915_get_utemp(p);
i915_emit_arith(p,
@ -824,11 +839,11 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_SGE:
emit_simple_arith(p, inst, A0_SGE, 2);
emit_simple_arith(p, inst, A0_SGE, 2, fs);
break;
case TGSI_OPCODE_SIN:
src0 = src_vector(p, &inst->Src[0]);
src0 = src_vector(p, &inst->Src[0], fs);
tmp = i915_get_utemp(p);
i915_emit_arith(p,
@ -879,22 +894,22 @@ i915_translate_instruction(struct i915_fp_compile *p,
case TGSI_OPCODE_SLE:
/* like SGE, but swap reg0, reg1 */
emit_simple_arith_swap2(p, inst, A0_SGE, 2);
emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs);
break;
case TGSI_OPCODE_SLT:
emit_simple_arith(p, inst, A0_SLT, 2);
emit_simple_arith(p, inst, A0_SLT, 2, fs);
break;
case TGSI_OPCODE_SGT:
/* like SLT, but swap reg0, reg1 */
emit_simple_arith_swap2(p, inst, A0_SLT, 2);
emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs);
break;
case TGSI_OPCODE_SNE:
/* if we're neither < nor > then we're != */
src0 = src_vector(p, &inst->Src[0]);
src1 = src_vector(p, &inst->Src[1]);
src0 = src_vector(p, &inst->Src[0], fs);
src1 = src_vector(p, &inst->Src[1], fs);
tmp = i915_get_utemp(p);
i915_emit_arith(p,
@ -921,7 +936,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
case TGSI_OPCODE_SSG:
/* compute (src>0) - (src<0) */
src0 = src_vector(p, &inst->Src[0]);
src0 = src_vector(p, &inst->Src[0], fs);
tmp = i915_get_utemp(p);
i915_emit_arith(p,
@ -947,8 +962,8 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_SUB:
src0 = src_vector(p, &inst->Src[0]);
src1 = src_vector(p, &inst->Src[1]);
src0 = src_vector(p, &inst->Src[0], fs);
src1 = src_vector(p, &inst->Src[1], fs);
i915_emit_arith(p,
A0_ADD,
@ -958,19 +973,19 @@ i915_translate_instruction(struct i915_fp_compile *p,
break;
case TGSI_OPCODE_TEX:
emit_tex(p, inst, T0_TEXLD);
emit_tex(p, inst, T0_TEXLD, fs);
break;
case TGSI_OPCODE_TRUNC:
emit_simple_arith(p, inst, A0_TRC, 1);
emit_simple_arith(p, inst, A0_TRC, 1, fs);
break;
case TGSI_OPCODE_TXB:
emit_tex(p, inst, T0_TEXLDB);
emit_tex(p, inst, T0_TEXLDB, fs);
break;
case TGSI_OPCODE_TXP:
emit_tex(p, inst, T0_TEXLDP);
emit_tex(p, inst, T0_TEXLDP, fs);
break;
case TGSI_OPCODE_XPD:
@ -980,8 +995,8 @@ i915_translate_instruction(struct i915_fp_compile *p,
* result.z = src0.x * src1.y - src0.y * src1.x;
* result.w = undef;
*/
src0 = src_vector(p, &inst->Src[0]);
src1 = src_vector(p, &inst->Src[1]);
src0 = src_vector(p, &inst->Src[0], fs);
src1 = src_vector(p, &inst->Src[1], fs);
tmp = i915_get_utemp(p);
i915_emit_arith(p,
@ -1016,7 +1031,8 @@ i915_translate_instruction(struct i915_fp_compile *p,
*/
static void
i915_translate_instructions(struct i915_fp_compile *p,
const struct tgsi_token *tokens)
const struct tgsi_token *tokens,
struct i915_fragment_shader *fs)
{
struct i915_fragment_shader *ifs = p->shader;
struct tgsi_parse_context parse;
@ -1097,7 +1113,7 @@ i915_translate_instructions(struct i915_fp_compile *p,
p->first_instruction = FALSE;
}
i915_translate_instruction(p, &parse.FullToken.FullInstruction);
i915_translate_instruction(p, &parse.FullToken.FullInstruction, fs);
break;
default:
@ -1115,6 +1131,7 @@ i915_init_compile(struct i915_context *i915,
struct i915_fragment_shader *ifs)
{
struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile);
int i;
p->shader = ifs;
@ -1127,6 +1144,9 @@ i915_init_compile(struct i915_context *i915,
ifs->num_constants = 0;
memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags));
for (i = 0; i < I915_TEX_UNITS; i++)
ifs->generic_mapping[i] = -1;
p->first_instruction = TRUE;
p->nr_tex_indirect = 1; /* correct? */
@ -1296,7 +1316,7 @@ i915_translate_fragment_program( struct i915_context *i915,
p = i915_init_compile(i915, fs);
i915_find_wpos_space(p);
i915_translate_instructions(p, tokens);
i915_translate_instructions(p, tokens, fs);
i915_fixup_depth_write(p);
i915_fini_compile(i915, p);

View file

@ -35,6 +35,18 @@
#include "i915_debug.h"
#include "i915_reg.h"
static uint find_mapping(struct i915_fragment_shader* fs, int unit)
{
int i;
for (i = 0; i < I915_TEX_UNITS ; i++)
{
if (fs->generic_mapping[i] == unit)
return i;
}
debug_printf("Mapping not found\n");
return 0;
}
/***********************************************************************
@ -46,12 +58,12 @@ static void calculate_vertex_layout(struct i915_context *i915)
const struct i915_fragment_shader *fs = i915->fs;
const enum interp_mode colorInterp = i915->rasterizer->color_interp;
struct vertex_info vinfo;
boolean texCoords[8], colors[2], fog, needW, have_varyings;
boolean texCoords[I915_TEX_UNITS], colors[2], fog, needW;
uint i;
int src;
memset(texCoords, 0, sizeof(texCoords));
colors[0] = colors[1] = fog = needW = have_varyings = FALSE;
colors[0] = colors[1] = fog = needW = FALSE;
memset(&vinfo, 0, sizeof(vinfo));
/* Determine which fragment program inputs are needed. Setup HW vertex
@ -67,19 +79,11 @@ static void calculate_vertex_layout(struct i915_context *i915)
break;
case TGSI_SEMANTIC_GENERIC:
{
/* texcoords/varyings */
/* texcoords/varyings/other generic */
/* XXX handle back/front face and point size */
uint unit = fs->info.input_semantic_index[i];
/* Route varyings as tex coords */
if ( (unit >= 10) && (unit < 18) ) {
have_varyings = TRUE;
unit -= 10;
}
assert(unit < 8);
texCoords[unit] = TRUE;
texCoords[find_mapping(fs, unit)] = TRUE;
needW = TRUE;
}
break;
@ -130,14 +134,11 @@ static void calculate_vertex_layout(struct i915_context *i915)
}
/* texcoords/varyings */
for (i = 0; i < 8; i++) {
for (i = 0; i < I915_TEX_UNITS; i++) {
uint hwtc;
if (texCoords[i]) {
hwtc = TEXCOORDFMT_4D;
if (!have_varyings)
src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_GENERIC, i);
else
src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_GENERIC, i + 10);
src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_GENERIC, fs->generic_mapping[i]);
draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
}
else {