Implement enough of ARB_fragment_program for Keith's texenv program

generation in Mesa.  Requires most recent Mesa cvs to work correctly.

Swizzle code is possibly slow/buggy and will probably be replaced.

Old behaviour can be controlled by a #define in r300_context.h, if
no-one minds I'll remove the old code later on.

Also, disabled cube map extension.  AFAIK We don't support it yet,
and it causes issues with UT2004.
This commit is contained in:
Ben Skeggs 2005-05-23 14:14:19 +00:00
parent 0c4731b27c
commit eeada48497
9 changed files with 1312 additions and 53 deletions

View file

@ -37,6 +37,7 @@ DRIVER_SOURCES = \
r300_texstate.c \
r300_texprog.c \
r300_vertexprog.c \
r300_fragprog.c \
r300_shader.c \
r300_maos.c
# \

View file

@ -73,14 +73,17 @@ static const char *const card_extensions[] = {
"GL_ARB_multitexture",
"GL_ARB_texture_border_clamp",
"GL_ARB_texture_compression",
"GL_ARB_texture_cube_map",
/* disable until we support it, fixes a few things in ut2004 */
// "GL_ARB_texture_cube_map",
"GL_ARB_texture_env_add",
"GL_ARB_texture_env_combine",
"GL_ARB_texture_env_dot3",
"GL_ARB_texture_mirrored_repeat",
"GL_ARB_vertex_buffer_object",
"GL_ARB_vertex_program",
//"GL_ARB_fragment_program",
#if USE_ARB_F_P == 1
"GL_ARB_fragment_program",
#endif
"GL_EXT_blend_equation_separate",
"GL_EXT_blend_func_separate",
"GL_EXT_blend_minmax",
@ -101,6 +104,7 @@ static const char *const card_extensions[] = {
"GL_NV_blend_square",
"GL_NV_vertex_program",
"GL_SGIS_generate_mipmap",
"GL_ARB_texture_env_crossbar",
NULL
};
@ -325,7 +329,20 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
ctx->Const.MaxVertexProgramLocalParams=256; // r420
ctx->Const.MaxVertexProgramEnvParams=256; // r420
ctx->Const.MaxVertexProgramAddressRegs=1;
#if USE_ARB_F_P
ctx->Const.MaxFragmentProgramTemps = PFS_NUM_TEMP_REGS;
ctx->Const.MaxFragmentProgramAttribs = 11; /* copy i915... */
ctx->Const.MaxFragmentProgramLocalParams = PFS_NUM_CONST_REGS;
ctx->Const.MaxFragmentProgramEnvParams = PFS_NUM_CONST_REGS;
ctx->Const.MaxFragmentProgramAluInstructions = PFS_MAX_ALU_INST;
ctx->Const.MaxFragmentProgramTexInstructions = PFS_MAX_TEX_INST;
ctx->Const.MaxFragmentProgramInstructions = PFS_MAX_ALU_INST+PFS_MAX_TEX_INST;
ctx->Const.MaxFragmentProgramTexIndirections = PFS_MAX_TEX_INDIRECT;
ctx->Const.MaxFragmentProgramAddressRegs = 0; /* and these are?? */
ctx->_MaintainTexEnvProgram = GL_TRUE;
#endif
driInitExtensions(ctx, card_extensions, GL_TRUE);
radeonInitSpanFuncs(ctx);

View file

@ -47,6 +47,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "colormac.h"
#include "radeon_context.h"
#define USE_ARB_F_P 1
struct r300_context;
typedef struct r300_context r300ContextRec;
typedef struct r300_context *r300ContextPtr;
@ -83,7 +85,11 @@ typedef GLubyte uint8_t;
and pixel_shader structure later on */
#define CARD32 GLuint
#include "vertex_shader.h"
#if USE_ARB_F_P == 1
#include "r300_fragprog.h"
#else
#include "pixel_shader.h"
#endif
#undef CARD32
static __inline__ uint32_t r300PackFloat32(float fl)
@ -588,6 +594,67 @@ struct r300_vertex_program {
int tex_regs[8];
};
#if USE_ARB_F_P == 1
#define PFS_MAX_ALU_INST 64
#define PFS_MAX_TEX_INST 64
#define PFS_MAX_TEX_INDIRECT 4
#define PFS_NUM_TEMP_REGS 32
#define PFS_NUM_CONST_REGS 32
struct r300_fragment_program {
struct fragment_program mesa_program;
GLcontext *ctx;
GLboolean translated;
GLboolean error;
struct {
int length;
GLuint inst[PFS_MAX_TEX_INST];
} tex;
struct {
struct {
GLuint inst0;
GLuint inst1;
GLuint inst2;
GLuint inst3;
} inst[PFS_MAX_ALU_INST];
} alu;
int v_pos;
int s_pos;
struct {
int tex_offset;
int tex_end;
int alu_offset;
int alu_end;
} node[4];
int cur_node;
int first_node_has_tex;
int alu_offset;
int alu_end;
int tex_offset;
int tex_end;
struct {
float x;
float y;
float z;
float w;
} param[32];
int param_length;
GLuint temps[PFS_NUM_TEMP_REGS];
int temp_in_use;
GLuint used_in_node;
GLuint dest_in_node;
GLuint inputs[32]; /* don't actually need 32... */
int hwreg_in_use;
int max_temp_idx;
};
#else
/* 64 appears to be the maximum */
#define PSF_MAX_PROGRAM_LENGTH 64
@ -652,6 +719,7 @@ struct r300_pixel_shader_state {
float w;
} param[MAX_PIXEL_SHADER_PARAMS];
};
#endif // USE_ARB_F_P
/* 8 is somewhat bogus... it is probably something like 24 */
#define R300_MAX_AOS_ARRAYS 8
@ -682,8 +750,9 @@ struct r300_state {
struct r300_texture_state texture;
struct r300_vap_reg_state vap_reg;
struct r300_vertex_shader_state vertex_shader;
#if USE_ARB_F_P == 0
struct r300_pixel_shader_state pixel_shader;
#endif
struct r300_dma_region aos[R300_MAX_AOS_ARRAYS];
int aos_count;

View file

@ -0,0 +1,937 @@
/*
* Copyright (C) 2005 Ben Skeggs.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
/*
* Authors:
* Ben Skeggs <darktama@iinet.net.au>
*/
/*TODO'S
*
* - Implement remaining arb_f_p opcodes
* - Depth write
* - Negate on individual components (implement in swizzle code?)
* - Reuse input/temp regs, if they're no longer needed.
* - Find out whether there's any benifit in ordering registers the way
* fglrx does (see r300_reg.h).
* - and more...
*/
#include "glheader.h"
#include "macros.h"
#include "enums.h"
#include "program.h"
#include "nvfragprog.h"
#include "r300_context.h"
#if USE_ARB_F_P == 1
#include "r300_fragprog.h"
#include "r300_reg.h"
#define PFS_INVAL 0xFFFFFFFF
static void dump_program(struct r300_fragment_program *rp);
static void emit_arith(struct r300_fragment_program *rp, int op,
pfs_reg_t dest, int mask,
pfs_reg_t src0, pfs_reg_t src1, pfs_reg_t src2,
int flags);
/***************************************
* begin: useful data structions for fragment program generation
***************************************/
/* description of r300 native hw instructions */
const struct {
const char *name;
int argc;
int v_op;
int s_op;
} r300_fpop[] = {
{ "MAD", 3, R300_FPI0_OUTC_MAD, R300_FPI2_OUTA_MAD },
{ "DP3", 2, R300_FPI0_OUTC_DP3, PFS_INVAL },
{ "DP4", 2, R300_FPI0_OUTC_DP4, R300_FPI2_OUTA_DP4 },
{ "MIN", 2, R300_FPI0_OUTC_MIN, R300_FPI2_OUTA_MIN },
{ "MAX", 2, R300_FPI0_OUTC_MAX, R300_FPI2_OUTA_MAX },
{ "CMP", 3, R300_FPI0_OUTC_CMP, R300_FPI2_OUTA_CMP },
{ "FRC", 1, R300_FPI0_OUTC_FRC, R300_FPI2_OUTA_FRC },
/* should the vector insns below be REPL_ALPHA? */
{ "EX2", 1, PFS_INVAL, R300_FPI2_OUTA_EX2 },
{ "LG2", 1, PFS_INVAL, R300_FPI2_OUTA_LG2 },
{ "RCP", 1, PFS_INVAL, R300_FPI2_OUTA_RCP },
{ "RSQ", 1, PFS_INVAL, R300_FPI2_OUTA_RSQ },
};
#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \
SWIZZLE_##y, \
SWIZZLE_##z, \
SWIZZLE_ZERO))
/* vector swizzles r300 can support natively, with a couple of
* cases we handle specially
*
* pfs_reg_t.v_swz/pfs_reg_t.s_swz is an index into this table
**/
static const struct r300_pfv_swizzle {
const char *name;
GLuint hash; /* swizzle value this matches */
GLboolean native;
GLuint base; /* base value for hw swizzle */
GLuint stride; /* difference in base between arg0/1/2 */
GLboolean dep_sca;
} v_swiz[] = {
/* native swizzles */
{ "xyz", MAKE_SWZ3(X, Y, Z), GL_TRUE, R300_FPI0_ARGC_SRC0C_XYZ, 4, GL_FALSE },
{ "xxx", MAKE_SWZ3(X, X, X), GL_TRUE, R300_FPI0_ARGC_SRC0C_XXX, 4, GL_FALSE },
{ "yyy", MAKE_SWZ3(Y, Y, Y), GL_TRUE, R300_FPI0_ARGC_SRC0C_YYY, 4, GL_FALSE },
{ "zzz", MAKE_SWZ3(Z, Z, Z), GL_TRUE, R300_FPI0_ARGC_SRC0C_ZZZ, 4, GL_FALSE },
{ "yzx", MAKE_SWZ3(Y, Z, X), GL_TRUE, R300_FPI0_ARGC_SRC0C_YZX, 1, GL_FALSE },
{ "zxy", MAKE_SWZ3(Z, X, Y), GL_TRUE, R300_FPI0_ARGC_SRC0C_ZXY, 1, GL_FALSE },
{ "wzy", MAKE_SWZ3(W, Z, Y), GL_TRUE, R300_FPI0_ARGC_SRC0CA_WZY, 1, GL_TRUE },
/* special cases */
{ NULL, MAKE_SWZ3(W, W, W), GL_FALSE, 0, 0, GL_FALSE},
{ NULL, MAKE_SWZ3(ONE, ONE, ONE), GL_FALSE, R300_FPI0_ARGC_ONE, 0, GL_FALSE},
{ NULL, MAKE_SWZ3(ZERO, ZERO, ZERO), GL_FALSE, R300_FPI0_ARGC_ZERO, 0, GL_FALSE},
{ NULL, PFS_INVAL, GL_FALSE, R300_FPI0_ARGC_HALF, 0, GL_FALSE},
{ NULL, PFS_INVAL, GL_FALSE, 0, 0, 0 },
};
#define SWIZZLE_XYZ 0
#define SWIZZLE_XXX 1
#define SWIZZLE_WZY 6
#define SWIZZLE_111 8
#define SWIZZLE_000 9
#define SWIZZLE_HHH 10
#define SWZ_X_MASK (7 << 0)
#define SWZ_Y_MASK (7 << 3)
#define SWZ_Z_MASK (7 << 6)
#define SWZ_W_MASK (7 << 9)
/* used during matching of non-native swizzles */
static const struct {
GLuint hash; /* used to mask matching swizzle components */
int mask; /* actual outmask */
int count; /* count of components matched */
} s_mask[] = {
{ SWZ_X_MASK|SWZ_Y_MASK|SWZ_Z_MASK, 1|2|4, 3},
{ SWZ_X_MASK|SWZ_Y_MASK, 1|2, 2},
{ SWZ_X_MASK|SWZ_Z_MASK, 1|4, 2},
{ SWZ_Y_MASK|SWZ_Z_MASK, 2|4, 2},
{ SWZ_X_MASK, 1, 1},
{ SWZ_Y_MASK, 2, 1},
{ SWZ_Z_MASK, 4, 1},
{ PFS_INVAL, PFS_INVAL, PFS_INVAL}
};
/* mapping from SWIZZLE_* to r300 native values for scalar insns */
static const struct {
const char *name;
int base; /* hw value of swizzle */
int stride; /* difference between SRC0/1/2 */
GLboolean dep_vec;
} s_swiz[] = {
{ "x", R300_FPI2_ARGA_SRC0C_X, 3, GL_TRUE },
{ "y", R300_FPI2_ARGA_SRC0C_Y, 3, GL_TRUE },
{ "z", R300_FPI2_ARGA_SRC0C_Z, 3, GL_TRUE },
{ "w", R300_FPI2_ARGA_SRC0A , 1, GL_FALSE },
{ "0", R300_FPI2_ARGA_ZERO , 0, GL_FALSE },
{ "1", R300_FPI2_ARGA_ONE , 0, GL_FALSE },
{ ".5", R300_FPI2_ARGA_HALF, 0, GL_FALSE }
};
#define SWIZZLE_HALF 6
/* boiler-plate reg, for convenience */
const pfs_reg_t pfs_default_reg = {
type: REG_TYPE_TEMP,
index: 0,
v_swz: 0 /* matches XYZ in table */,
s_swz: SWIZZLE_W,
vcross: 0,
scross: 0,
negate: 0,
has_w: GL_FALSE,
valid: GL_FALSE
};
/* constant zero source */
const pfs_reg_t pfs_one = {
type: REG_TYPE_CONST,
index: 0,
v_swz: SWIZZLE_111,
s_swz: SWIZZLE_ONE,
valid: GL_TRUE
};
/* constant one source */
const pfs_reg_t pfs_zero = {
type: REG_TYPE_CONST,
index: 0,
v_swz: SWIZZLE_000,
s_swz: SWIZZLE_ZERO,
valid: GL_TRUE
};
/***************************************
* end: data structures
***************************************/
#define ERROR(fmt, args...) do { \
fprintf(stderr, "%s::%s(): " fmt "\n", __FILE__, __func__, ##args); \
rp->error = GL_TRUE; \
} while(0)
static int get_hw_temp(struct r300_fragment_program *rp)
{
int r = ffs(~rp->hwreg_in_use);
if (!r) {
ERROR("Out of hardware temps\n");
return 0;
}
rp->hwreg_in_use |= (1 << --r);
if (r > rp->max_temp_idx)
rp->max_temp_idx = r;
return r;
}
static void free_hw_temp(struct r300_fragment_program *rp, int idx)
{
rp->hwreg_in_use &= ~(1<<idx);
}
static pfs_reg_t get_temp_reg(struct r300_fragment_program *rp)
{
pfs_reg_t r = pfs_default_reg;
r.index = ffs(~rp->temp_in_use);
if (!r.index) {
ERROR("Out of program temps\n");
return r;
}
rp->temp_in_use |= (1 << --r.index);
rp->temps[r.index] = get_hw_temp(rp);
r.valid = GL_TRUE;
return r;
}
static void free_temp(struct r300_fragment_program *rp, pfs_reg_t r)
{
if (!rp || !(rp->temp_in_use & (1<<r.index))) return;
free_hw_temp(rp, rp->temps[r.index]);
rp->temp_in_use &= ~(1<<r.index);
}
static pfs_reg_t emit_const4fv(struct r300_fragment_program *rp, GLfloat *cp)
{
pfs_reg_t r = pfs_default_reg;
r.type = REG_TYPE_CONST;
r.valid = GL_TRUE;
if (cp[0] == 1.0 && cp[1] == 1.0 && cp[2] == 1.0 && cp[3] == 1.0) {
r.v_swz = SWIZZLE_111;
r.s_swz = SWIZZLE_ONE;
r.index = 0;
} else if (cp[0] == 0.0 && cp[1] == 0.0 && cp[2] == 0.0 && cp[3] == 0.0) {
r.v_swz = SWIZZLE_000;
r.s_swz = SWIZZLE_ZERO;
r.index = 0;
} else if (cp[0] == 0.5 && cp[1] == 0.5 && cp[2] == 0.5 && cp[3] == 0.5) {
r.v_swz = SWIZZLE_HHH;
r.s_swz = SWIZZLE_HALF;
r.index = 0;
} else {
r.index = rp->param_length++;
rp->param[r.index].x = cp[0];
rp->param[r.index].y = cp[1];
rp->param[r.index].z = cp[2];
rp->param[r.index].w = cp[3];
}
return r;
}
static pfs_reg_t negate(pfs_reg_t r)
{
r.negate = 1;
return r;
}
static int swz_native(struct r300_fragment_program *rp,
pfs_reg_t src, pfs_reg_t *r)
{
/* Native swizzle, nothing to see here */
*r = src;
r->has_w = GL_TRUE;
return 3;
}
static int swz_emit_partial(struct r300_fragment_program *rp,
pfs_reg_t src, pfs_reg_t *r, int mask)
{
if (!r->valid)
*r = get_temp_reg(rp);
/* A partial match, src.v_swz/mask define what parts of the
* desired swizzle we match */
emit_arith(rp, PFS_OP_MAD, *r, s_mask[mask].mask, src, pfs_one, pfs_zero, 0);
return s_mask[mask].count;
}
static int swz_special_case(struct r300_fragment_program *rp,
pfs_reg_t src, pfs_reg_t *r, int mask)
{
pfs_reg_t ssrc = pfs_default_reg;
switch(GET_SWZ(v_swiz[src.v_swz].hash, 0)) {
case SWIZZLE_W:
ssrc = get_temp_reg(rp);
src.v_swz = SWIZZLE_WZY;
src.vcross = GL_TRUE;
if (s_mask[mask].count == 3) {
emit_arith(rp, PFS_OP_MAD, ssrc, WRITEMASK_XW, src, pfs_one, pfs_zero, 0);
*r = ssrc;
r->v_swz = SWIZZLE_XXX;
r->s_swz = SWIZZLE_W;
r->has_w = GL_TRUE;
} else {
if (!r->valid)
*r = get_temp_reg(rp);
emit_arith(rp, PFS_OP_MAD, ssrc, WRITEMASK_X, src, pfs_one, pfs_zero, 0);
ssrc.v_swz = SWIZZLE_XXX;
emit_arith(rp, PFS_OP_MAD, *r, s_mask[mask].mask, ssrc, pfs_one, pfs_zero, 0);
free_temp(rp, ssrc);
}
break;
case SWIZZLE_ONE:
case SWIZZLE_ZERO:
default:
ERROR("Unknown special-case swizzle! %d\n", src.v_swz);
return 0;
}
return s_mask[mask].count;
}
static pfs_reg_t swizzle(struct r300_fragment_program *rp,
pfs_reg_t src,
GLuint arbswz)
{
pfs_reg_t r = pfs_default_reg;
int c_mask = 0;
int v_matched = 0;
src.v_swz = SWIZZLE_XYZ;
src.s_swz = GET_SWZ(arbswz, 3);
if (src.s_swz >= SWIZZLE_X && src.s_swz <= SWIZZLE_Z)
src.scross = GL_TRUE;
do {
do {
#define CUR_HASH (v_swiz[src.v_swz].hash & s_mask[c_mask].hash)
if (CUR_HASH == (arbswz & s_mask[c_mask].hash)) {
if (v_swiz[src.v_swz].native == GL_FALSE)
v_matched += swz_special_case(rp, src, &r, c_mask);
else if (s_mask[c_mask].count == 3)
v_matched += swz_native(rp, src, &r);
else
v_matched += swz_emit_partial(rp, src, &r, c_mask);
if (v_matched == 3) {
if (!r.has_w) {
emit_arith(rp, PFS_OP_MAD, r, WRITEMASK_W, src, pfs_one, pfs_zero, 0);
r.s_swz = SWIZZLE_W;
}
if (r.type != REG_TYPE_CONST) {
if (r.v_swz == SWIZZLE_WZY)
r.vcross = GL_TRUE;
if (r.s_swz >= SWIZZLE_X && r.s_swz <= SWIZZLE_Z)
r.scross = GL_TRUE;
}
return r;
}
arbswz &= ~s_mask[c_mask].hash;
}
} while(v_swiz[++src.v_swz].hash != PFS_INVAL);
} while (s_mask[++c_mask].hash != PFS_INVAL);
ERROR("should NEVER get here\n");
return r;
}
static pfs_reg_t t_src(struct r300_fragment_program *rp,
struct fp_src_register fpsrc) {
pfs_reg_t r = pfs_default_reg;
switch (fpsrc.File) {
case PROGRAM_TEMPORARY:
r.index = fpsrc.Index;
r.valid = GL_TRUE;
break;
case PROGRAM_INPUT:
r.index = fpsrc.Index;
r.type = REG_TYPE_INPUT;
r.valid = GL_TRUE;
break;
case PROGRAM_LOCAL_PARAM:
r = emit_const4fv(rp, rp->mesa_program.Base.LocalParams[fpsrc.Index]);
break;
case PROGRAM_ENV_PARAM:
r = emit_const4fv(rp, rp->ctx->FragmentProgram.Parameters[fpsrc.Index]);
break;
case PROGRAM_STATE_VAR:
case PROGRAM_NAMED_PARAM:
r = emit_const4fv(rp, rp->mesa_program.Parameters->ParameterValues[fpsrc.Index]);
break;
default:
ERROR("unknown SrcReg->File %x\n", fpsrc.File);
return r;
}
/* no point swizzling ONE/ZERO/HALF constants... */
if (r.v_swz < SWIZZLE_111 && r.s_swz < SWIZZLE_ZERO)
r = swizzle(rp, r, fpsrc.Swizzle);
/* WRONG! Need to be able to do individual component negation,
* should probably handle this in the swizzling code unless
* all components are negated, then we can do this natively */
if (fpsrc.NegateBase)
r.negate = GL_TRUE;
return r;
}
static pfs_reg_t t_dst(struct r300_fragment_program *rp,
struct fp_dst_register dest) {
pfs_reg_t r = pfs_default_reg;
switch (dest.File) {
case PROGRAM_TEMPORARY:
r.index = dest.Index;
r.valid = GL_TRUE;
return r;
case PROGRAM_OUTPUT:
r.type = REG_TYPE_OUTPUT;
switch (dest.Index) {
case 0:
r.valid = GL_TRUE;
return r;
case 1:
ERROR("I don't know how to write depth!\n");
return r;
default:
ERROR("Bad DstReg->Index 0x%x\n", dest.Index);
return r;
}
default:
ERROR("Bad DstReg->File 0x%x\n", dest.File);
return r;
}
}
static void sync_streams(struct r300_fragment_program *rp) {
/* Bring vector/scalar streams into sync, inserting nops into
* whatever stream is lagging behind
*
* I'm using "MAD t0, t0, 1.0, 0.0" as a NOP
*/
while (rp->v_pos != rp->s_pos) {
if (rp->s_pos > rp->v_pos) {
rp->alu.inst[rp->v_pos].inst0 = 0x00050A80;
rp->alu.inst[rp->v_pos].inst1 = 0x03820800;
rp->v_pos++;
} else {
rp->alu.inst[rp->s_pos].inst2 = 0x00040889;
rp->alu.inst[rp->s_pos].inst3 = 0x00820800;
rp->s_pos++;
}
}
}
static void emit_tex(struct r300_fragment_program *rp,
struct fp_instruction *fpi,
int opcode)
{
pfs_reg_t coord = t_src(rp, fpi->SrcReg[0]);
pfs_reg_t dest = t_dst(rp, fpi->DstReg);
int unit = fpi->TexSrcUnit;
int hwsrc, hwdest, flags = 0;
switch (coord.type) {
case REG_TYPE_TEMP:
hwsrc = rp->temps[coord.index];
break;
case REG_TYPE_INPUT:
hwsrc = rp->inputs[coord.index];
break;
case REG_TYPE_CONST:
hwsrc = coord.index;
flags = R300_FPITX_SRC_CONST;
break;
default:
ERROR("Unknown coord.type = %d\n", coord.type);
return;
}
hwdest = rp->temps[dest.index];
/* Indirection if source has been written in this node, or if the dest has
* been read/written in this node
*/
if ((coord.type != REG_TYPE_CONST && (rp->dest_in_node & (1<<hwsrc))) ||
(rp->used_in_node & (1<<hwdest))) {
if (rp->cur_node == 3) { /* We only support 4 natively */
ERROR("too many levels of texture indirection\n");
return;
}
/* Finish off current node */
sync_streams(rp);
rp->node[rp->cur_node].alu_end = rp->v_pos - 1;
/* Start new node */
rp->cur_node++;
rp->used_in_node = 0;
rp->dest_in_node = 0;
rp->node[rp->cur_node].tex_offset = rp->tex.length;
rp->node[rp->cur_node].alu_offset = rp->v_pos;
rp->node[rp->cur_node].tex_end = -1;
rp->node[rp->cur_node].alu_end = -1;
}
if (rp->cur_node == 0) rp->first_node_has_tex = 1;
rp->tex.inst[rp->tex.length++] = 0
| (hwsrc << R300_FPITX_SRC_SHIFT)
| (hwdest << R300_FPITX_DST_SHIFT)
| (unit << R300_FPITX_IMAGE_SHIFT)
| (opcode << R300_FPITX_OPCODE_SHIFT) /* not entirely sure about this */
| flags;
rp->dest_in_node |= (1 << hwdest);
rp->node[rp->cur_node].tex_end++;
}
static void emit_arith(struct r300_fragment_program *rp, int op,
pfs_reg_t dest, int mask,
pfs_reg_t src0, pfs_reg_t src1, pfs_reg_t src2,
int flags)
{
pfs_reg_t src[3] = { src0, src1, src2 };
int hwdest, hwsrc[3];
int argc;
int v_idx = rp->v_pos, s_idx = rp->s_pos;
GLuint inst[4] = { 0, 0, 0, 0 };
int i;
if (!dest.valid || !src0.valid || !src1.valid || !src2.valid) {
ERROR("invalid register. dest/src0/src1/src2 valid = %d/%d/%d/%d\n",
dest.valid, src0.valid, src1.valid, src2.valid);
return;
}
/* check opcode */
if (op > MAX_PFS_OP) {
ERROR("unknown opcode!\n");
return;
}
argc = r300_fpop[op].argc;
/* grab hwregs of sources */
for (i=0;i<argc;i++) {
switch (src[i].type) {
case REG_TYPE_INPUT:
hwsrc[i] = rp->inputs[src[i].index];
rp->used_in_node |= (1 << hwsrc[i]);
break;
case REG_TYPE_TEMP:
/* make sure insn ordering is right... */
if ((src[i].vcross && v_idx < s_idx) ||
(src[i].scross && s_idx < v_idx)) {
sync_streams(rp);
v_idx = s_idx = rp->v_pos;
}
hwsrc[i] = rp->temps[src[i].index];
rp->used_in_node |= (1 << hwsrc[i]);
break;
case REG_TYPE_CONST:
hwsrc[i] = src[i].index;
break;
default:
ERROR("invalid source reg\n");
return;
}
}
/* grab hwregs of dest */
switch (dest.type) {
case REG_TYPE_TEMP:
hwdest = rp->temps[dest.index];
rp->dest_in_node |= (1 << hwdest);
rp->used_in_node |= (1 << hwdest);
break;
case REG_TYPE_OUTPUT:
hwdest = 0;
break;
default:
ERROR("invalid dest reg type %d\n", dest.type);
return;
}
for (i=0;i<3;i++) {
if (i < argc) {
inst[0] |= (v_swiz[src[i].v_swz].base + (i * v_swiz[src[i].v_swz].stride)) << (i * 7);
inst[2] |= (s_swiz[src[i].s_swz].base + (i * s_swiz[src[i].s_swz].stride)) << (i * 7);
if (src[i].negate) {
inst[0] |= (1<<5) << (i*7);
inst[2] |= (1<<5) << (i*7);
}
inst[1] |= hwsrc[i] << (i*6);
inst[3] |= hwsrc[i] << (i*6);
if (src[i].type == REG_TYPE_CONST) {
inst[1] |= (1<<5) << (i*6);
inst[3] |= (1<<5) << (i*6);
}
} else {
/* read constant zero, may aswell use a ZERO swizzle aswell.. */
inst[0] |= R300_FPI0_ARGC_ZERO << (i*7);
inst[2] |= R300_FPI2_ARGA_ZERO << (i*7);
inst[1] |= (1<<5) << (i*6);
inst[3] |= (1<<5) << (i*6);
}
}
if (mask & WRITEMASK_XYZ) {
rp->alu.inst[v_idx].inst0 = inst[0] | r300_fpop[op].v_op | flags;
rp->alu.inst[v_idx].inst1 = inst[1] |
(hwdest << R300_FPI1_DSTC_SHIFT) |
((mask & WRITEMASK_XYZ) << (dest.type == REG_TYPE_OUTPUT ? 26 : 23));
rp->v_pos = v_idx + 1;
}
if (mask & WRITEMASK_W) {
rp->alu.inst[s_idx].inst2 = inst[2] | r300_fpop[op].s_op | flags;
rp->alu.inst[s_idx].inst3 = inst[3] |
(hwdest << R300_FPI3_DSTA_SHIFT) |
(1 << (dest.type == REG_TYPE_OUTPUT ? 24 : 23));
rp->s_pos = s_idx + 1;
}
// sync_streams(rp);
return;
};
static GLboolean parse_program(struct r300_fragment_program *rp)
{
struct fragment_program *mp = &rp->mesa_program;
const struct fp_instruction *inst = mp->Instructions;
struct fp_instruction *fpi;
pfs_reg_t src0, src1, src2, dest, temp;
int flags = 0;
if (!inst || inst[0].Opcode == FP_OPCODE_END) {
ERROR("empty program?\n");
return GL_FALSE;
}
for (fpi=mp->Instructions; fpi->Opcode != FP_OPCODE_END; fpi++) {
if (inst->Saturate) flags = R300_FPI0_OUTC_SAT; /* same for OUTA */
switch (fpi->Opcode) {
case FP_OPCODE_ABS:
ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
break;
case FP_OPCODE_ADD:
emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg), fpi->DstReg.WriteMask,
t_src(rp, fpi->SrcReg[0]),
pfs_one,
t_src(rp, fpi->SrcReg[1]),
flags);
break;
case FP_OPCODE_CMP:
case FP_OPCODE_COS:
ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
break;
case FP_OPCODE_DP3:
case FP_OPCODE_DP4:
case FP_OPCODE_DPH:
case FP_OPCODE_DST:
case FP_OPCODE_EX2:
case FP_OPCODE_FLR:
case FP_OPCODE_FRC:
case FP_OPCODE_KIL:
case FP_OPCODE_LG2:
case FP_OPCODE_LIT:
ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
break;
case FP_OPCODE_LRP:
/* TODO: use the special LRP form if possible */
src0 = t_src(rp, fpi->SrcReg[0]);
src1 = t_src(rp, fpi->SrcReg[1]);
src2 = t_src(rp, fpi->SrcReg[2]);
// result = tmp0tmp1 + (1 - tmp0)tmp2
// = tmp0tmp1 + tmp2 + (-tmp0)tmp2
// MAD temp, -tmp0, tmp2, tmp2
// MAD result, tmp0, tmp1, temp
temp = get_temp_reg(rp);
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_XYZW,
negate(src0), src2, src2, 0);
emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg), fpi->DstReg.WriteMask,
src0, src1, temp, flags);
free_temp(rp, temp);
break;
case FP_OPCODE_MAD:
emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg), fpi->DstReg.WriteMask,
t_src(rp, fpi->SrcReg[0]),
t_src(rp, fpi->SrcReg[1]),
t_src(rp, fpi->SrcReg[2]),
flags);
break;
case FP_OPCODE_MAX:
case FP_OPCODE_MIN:
ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
break;
case FP_OPCODE_MOV:
emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg), fpi->DstReg.WriteMask,
t_src(rp, fpi->SrcReg[0]), pfs_one, pfs_zero,
flags);
break;
case FP_OPCODE_MUL:
emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg), fpi->DstReg.WriteMask,
t_src(rp, fpi->SrcReg[0]),
t_src(rp, fpi->SrcReg[1]),
pfs_zero,
flags);
break;
case FP_OPCODE_POW:
case FP_OPCODE_RCP:
case FP_OPCODE_RSQ:
case FP_OPCODE_SCS:
case FP_OPCODE_SGE:
case FP_OPCODE_SIN:
case FP_OPCODE_SLT:
ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
break;
case FP_OPCODE_SUB:
emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg), fpi->DstReg.WriteMask,
t_src(rp, fpi->SrcReg[0]),
pfs_one,
negate(t_src(rp, fpi->SrcReg[1])),
flags);
break;
case FP_OPCODE_SWZ:
ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
break;
case FP_OPCODE_TEX:
emit_tex(rp, fpi, R300_FPITX_OP_TEX);
break;
case FP_OPCODE_TXB:
emit_tex(rp, fpi, R300_FPITX_OP_TXB);
break;
case FP_OPCODE_TXP:
emit_tex(rp, fpi, R300_FPITX_OP_TXP);
break;
case FP_OPCODE_XPD:
ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
break;
default:
ERROR("unknown fpi->Opcode %d\n", fpi->Opcode);
break;
}
if (rp->error)
return GL_FALSE;
}
return GL_TRUE;
}
/* - Init structures
* - Determine what hwregs each input corresponds to
*/
void init_program(struct r300_fragment_program *rp)
{
struct fragment_program *mp = &rp->mesa_program;
struct fp_instruction *fpi;
GLuint InputsRead = mp->InputsRead;
GLuint fp_reg = 0;
GLuint temps_used = 0; /* for rp->temps[] */
int i;
rp->translated = GL_FALSE;
rp->error = GL_FALSE;
rp->v_pos = 0;
rp->s_pos = 0;
rp->tex.length = 0;
rp->node[0].alu_offset = 0;
rp->node[0].alu_end = -1;
rp->node[0].tex_offset = 0;
rp->node[0].tex_end = -1;
rp->cur_node = 0;
rp->first_node_has_tex = 0;
rp->used_in_node = 0;
rp->dest_in_node = 0;
rp->param_length = 0;
rp->temp_in_use = 0;
rp->hwreg_in_use = 0;
rp->max_temp_idx = 0;
/* Work out what temps the Mesa inputs correspond to, this must match
* what setup_rs_unit does, which shouldn't be a problem as rs_unit
* configures itself based on the fragprog's InputsRead
*/
/* Texcoords come first */
for (i=0;i<rp->ctx->Const.MaxTextureUnits;i++) {
if (InputsRead & (FRAG_BIT_TEX0 << i)) {
rp->hwreg_in_use |= (1<<fp_reg);
rp->inputs[FRAG_ATTRIB_TEX0+i] = fp_reg++;
}
}
InputsRead &= ~FRAG_BITS_TEX_ANY;
/* Then primary colour */
if (InputsRead & FRAG_BIT_COL0) {
rp->hwreg_in_use |= (1<<fp_reg);
rp->inputs[FRAG_ATTRIB_COL0] = fp_reg++;
}
InputsRead &= ~FRAG_BIT_COL0;
/* Anything else */
if (InputsRead) {
WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead);
/* force read from hwreg 0 for now */
for (i=0;i<32;i++)
if (InputsRead & (1<<i)) rp->inputs[i] = 0;
}
/* Possibly the worst part of how I went about this... Find out what
* temps are used by the mesa program so we don't clobber something
* when we need a temp for other reasons.
*
* Possibly not too bad actually, as we could add to this later and
* find out when inputs are last used so we can reuse them as temps.
*/
if (!mp->Instructions) {
ERROR("No instructions found in program\n");
return;
}
for (fpi=mp->Instructions;fpi->Opcode != FP_OPCODE_END; fpi++) {
for (i=0;i<3;i++) {
if (fpi->SrcReg[i].File == PROGRAM_TEMPORARY) {
if (!(temps_used & (1 << fpi->SrcReg[i].Index))) {
temps_used |= (1 << fpi->SrcReg[i].Index);
rp->temps[fpi->SrcReg[i].Index] = get_hw_temp(rp);
}
}
}
/* needed? surely if a program writes a temp it'll read it again */
if (fpi->DstReg.File == PROGRAM_TEMPORARY) {
if (!(temps_used & (1 << fpi->DstReg.Index))) {
temps_used |= (1 << fpi->DstReg.Index);
rp->temps[fpi->DstReg.Index] = get_hw_temp(rp);
}
}
}
rp->temp_in_use = temps_used;
/* Ask Mesa nicely to fill in ParameterValues for us */
_mesa_load_state_parameters(rp->ctx, rp->mesa_program.Parameters);
}
void translate_fragment_shader(struct r300_fragment_program *rp)
{
int i;
init_program(rp);
if (parse_program(rp) == GL_FALSE) {
dump_program(rp);
return;
}
/* Finish off */
sync_streams(rp);
rp->node[rp->cur_node].alu_end = rp->v_pos - 1;
rp->alu_offset = 0;
rp->alu_end = rp->v_pos - 1;
rp->tex_offset = 0;
rp->tex_end = rp->tex.length - 1;
rp->translated = GL_TRUE;
if (0) dump_program(rp);
}
/* just some random things... */
static void dump_program(struct r300_fragment_program *rp)
{
int i;
static int pc = 0;
fprintf(stderr, "pc=%d*************************************\n", pc++);
fprintf(stderr, "Mesa program:\n");
fprintf(stderr, "-------------\n");
_mesa_debug_fp_inst(rp->mesa_program.NumTexInstructions +
rp->mesa_program.NumAluInstructions,
rp->mesa_program.Instructions);
fflush(stdout);
fprintf(stderr, "Hardware program\n");
fprintf(stderr, "----------------\n");
for (i=0;i<(rp->cur_node+1);i++) {
fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, alu_end: %d, tex_end: %d\n", i,
rp->node[i].alu_offset,
rp->node[i].tex_offset,
rp->node[i].alu_end,
rp->node[i].tex_end);
}
/* dump program in pretty_print_command_stream.tcl-readable format */
fprintf(stderr, "%08x\n", ((rp->alu_end << 16) | (R300_PFS_INSTR0_0 >> 2)));
for (i=0;i<=rp->alu_end;i++)
fprintf(stderr, "%08x\n", rp->alu.inst[i].inst0);
fprintf(stderr, "%08x\n", ((rp->alu_end << 16) | (R300_PFS_INSTR1_0 >> 2)));
for (i=0;i<=rp->alu_end;i++)
fprintf(stderr, "%08x\n", rp->alu.inst[i].inst1);
fprintf(stderr, "%08x\n", ((rp->alu_end << 16) | (R300_PFS_INSTR2_0 >> 2)));
for (i=0;i<=rp->alu_end;i++)
fprintf(stderr, "%08x\n", rp->alu.inst[i].inst2);
fprintf(stderr, "%08x\n", ((rp->alu_end << 16) | (R300_PFS_INSTR3_0 >> 2)));
for (i=0;i<=rp->alu_end;i++)
fprintf(stderr, "%08x\n", rp->alu.inst[i].inst3);
fprintf(stderr, "00000000\n");
}
#endif // USE_ARB_F_P == 1

View file

@ -0,0 +1,46 @@
#ifndef __R300_FRAGPROG_H_
#define __R300_FRAGPROG_H_
#include "glheader.h"
#include "macros.h"
#include "enums.h"
#include "program.h"
#include "r300_context.h"
#include "nvfragprog.h"
/* representation of a register for emit_arith/swizzle */
typedef struct _pfs_reg_t {
enum {
REG_TYPE_INPUT,
REG_TYPE_OUTPUT,
REG_TYPE_TEMP,
REG_TYPE_CONST
} type:2;
GLuint index:6;
GLuint v_swz:5;
GLuint s_swz:5;
GLboolean vcross:1;
GLboolean scross:1;
GLuint negate:1; //XXX: we need to handle negate individually
GLboolean has_w:1;
GLboolean valid:1;
} pfs_reg_t;
/* supported hw opcodes */
#define PFS_OP_MAD 0
#define PFS_OP_DP3 1
#define PFS_OP_DP4 2
#define PFS_OP_MIN 3
#define PFS_OP_MAX 4
#define PFS_OP_CMP 5
#define PFS_OP_FRC 6
#define PFS_OP_EX2 7
#define PFS_OP_LG2 8
#define PFS_OP_RCP 9
#define PFS_OP_RSQ 10
#define MAX_PFS_OP 10
#define OP(n) PFS_OP_##n
#endif

View file

@ -823,6 +823,13 @@ I am fairly certain that they are correct unless stated otherwise in comments.
# define R300_FPITX_DST_MASK (31 << 6)
# define R300_FPITX_IMAGE_SHIFT 11
# define R300_FPITX_IMAGE_MASK (15 << 11) /* GUESS based on layout and native limits */
/* Unsure if these are opcodes, or some kind of bitfield, but this is how
* they were set when I checked
*/
# define R300_FPITX_OPCODE_SHIFT 15
# define R300_FPITX_OP_TEX 1
# define R300_FPITX_OP_TXP 3
# define R300_FPITX_OP_TXB 4
/* ALU
// The ALU instructions register blocks are enumerated according to the order
@ -997,13 +1004,13 @@ I am fairly certain that they are correct unless stated otherwise in comments.
# define R300_FPI2_ARG1A_MASK (31 << 7)
# define R300_FPI2_ARG1A_NEG (1 << 12)
# define R300_FPI2_ARG2A_SHIFT 14
# define R300_FPI2_AEG2A_MASK (31 << 14)
# define R300_FPI2_ARG2A_MASK (31 << 14)
# define R300_FPI2_ARG2A_NEG (1 << 19)
# define R300_FPI2_SPECIAL_LRP (1 << 21)
# define R300_FPI2_OUTA_MAD (0 << 23)
# define R300_FPI2_OUTA_DP4 (1 << 23)
# define R300_RPI2_OUTA_MIN (2 << 23)
# define R300_RPI2_OUTA_MAX (3 << 23)
# define R300_FPI2_OUTA_MIN (2 << 23)
# define R300_FPI2_OUTA_MAX (3 << 23)
# define R300_FPI2_OUTA_CMP (6 << 23)
# define R300_FPI2_OUTA_FRC (7 << 23)
# define R300_FPI2_OUTA_EX2 (8 << 23)

View file

@ -5,6 +5,9 @@
#include "program.h"
#include "r300_context.h"
#include "nvvertprog.h"
#if USE_ARB_F_P == 1
#include "r300_fragprog.h"
#endif
static void r300BindProgram(GLcontext *ctx, GLenum target, struct program *prog)
{
@ -13,6 +16,9 @@ static void r300BindProgram(GLcontext *ctx, GLenum target, struct program *prog)
switch(target){
case GL_VERTEX_PROGRAM_ARB:
#if USE_ARB_F_P == 1
case GL_FRAGMENT_PROGRAM_ARB:
#endif
//rmesa->current_vp = vp;
break;
default:
@ -24,7 +30,11 @@ static void r300BindProgram(GLcontext *ctx, GLenum target, struct program *prog)
static struct program *r300NewProgram(GLcontext *ctx, GLenum target, GLuint id)
{
struct r300_vertex_program *vp;
#if USE_ARB_F_P == 1
struct r300_fragment_program *fp;
#else
struct fragment_program *fp;
#endif
struct ati_fragment_shader *afs;
switch(target){
@ -33,9 +43,14 @@ static struct program *r300NewProgram(GLcontext *ctx, GLenum target, GLuint id)
return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
case GL_FRAGMENT_PROGRAM_ARB:
#if USE_ARB_F_P == 1
fp=CALLOC_STRUCT(r300_fragment_program);
fp->ctx = ctx;
return _mesa_init_fragment_program(ctx, &fp->mesa_program, target, id);
#else
fp=CALLOC_STRUCT(fragment_program);
return _mesa_init_fragment_program(ctx, fp, target, id);
#endif
case GL_FRAGMENT_PROGRAM_NV:
fp=CALLOC_STRUCT(fragment_program);
return _mesa_init_fragment_program(ctx, fp, target, id);
@ -64,15 +79,20 @@ void r300ProgramStringNotify(GLcontext *ctx, GLenum target,
struct program *prog)
{
struct r300_vertex_program *vp=(void *)prog;
#if USE_ARB_F_P == 1
struct r300_fragment_program *fp=(void *)prog;
#endif
switch(target) {
case GL_VERTEX_PROGRAM_ARB:
/*vp->translated=GL_FALSE;
translate_vertex_shader(vp);*/
//debug_vp(ctx, vp);
break;
case GL_FRAGMENT_PROGRAM_ARB:
#if USE_ARB_F_P == 1
fp->translated = GL_FALSE;
#endif
break;
}
}

View file

@ -58,10 +58,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r300_reg.h"
#include "r300_program.h"
#include "r300_emit.h"
#if USE_ARB_F_P == 1
#include "r300_fragprog.h"
#else
#include "r300_fixed_pipelines.h"
#include "r300_texprog.h"
#endif
#include "r300_tex.h"
#include "r300_maos.h"
#include "r300_texprog.h"
static void r300AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref)
{
@ -1094,6 +1098,99 @@ void r300_setup_textures(GLcontext *ctx)
fprintf(stderr, "TX_ENABLE: %08x max_texture_unit=%d\n", r300->hw.txe.cmd[R300_TXE_ENABLE], max_texture_unit);
}
#if USE_ARB_F_P == 1
void r300_setup_rs_unit(GLcontext *ctx)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
int i, vp_reg, fp_reg, in_texcoords;
/* I'm still unsure if these are needed */
GLuint interp_magic[8] = {
0x00,
0x40,
0x80,
0xC0,
0x00,
0x00,
0x00,
0x00
};
GLuint OutputsWritten;
GLuint InputsRead;
if(hw_tcl_on)
OutputsWritten = CURRENT_VERTEX_SHADER(ctx)->OutputsWritten;
else
OutputsWritten = r300->state.render_inputs;
if (ctx->FragmentProgram._Current)
InputsRead = ctx->FragmentProgram._Current->InputsRead;
else {
fprintf(stderr, "No ctx->FragmentProgram._Current!!\n");
return; /* This should only ever happen once.. */
}
/* This needs to be rewritten - it is a hack at best */
R300_STATECHANGE(r300, ri);
R300_STATECHANGE(r300, rc);
R300_STATECHANGE(r300, rr);
vp_reg = fp_reg = in_texcoords = 0;
r300->hw.rr.cmd[R300_RR_ROUTE_0] = 0;
for (i=0;i<ctx->Const.MaxTextureUnits;i++) {
if (OutputsWritten & (hw_tcl_on ? (1 << (VERT_RESULT_TEX0+i)) : (_TNL_BIT_TEX0<<i)))
in_texcoords++;
r300->hw.ri.cmd[R300_RI_INTERP_0+i] = 0
| R300_RS_INTERP_USED
| (vp_reg << R300_RS_INTERP_SRC_SHIFT)
| interp_magic[i];
if (InputsRead & (FRAG_BIT_TEX0<<i)) {
assert(r300->state.texture.tc_count != 0);
r300->hw.rr.cmd[R300_RR_ROUTE_0 + fp_reg] = 0
| R300_RS_ROUTE_ENABLE
| i /* source INTERP */
| (fp_reg << R300_RS_ROUTE_DEST_SHIFT);
if (OutputsWritten & (hw_tcl_on ? (1 << (VERT_RESULT_TEX0+i)) : (_TNL_BIT_TEX0<<i))) {
vp_reg++;
} else {
/* Unsure of how to handle this situation, for now print errors and
* the program will just recieve bogus data
*/
fprintf(stderr, "fragprog wants coords for tex%d, vp doesn't provide them!\n", i);
}
InputsRead &= ~(FRAG_BIT_TEX0<<i);
fp_reg++;
}
}
if (InputsRead & FRAG_BIT_COL0) {
if (!(OutputsWritten & (hw_tcl_on ? (1<<VERT_RESULT_COL0) : _TNL_BIT_COLOR0)))
fprintf(stderr, "fragprog wants col0, vp doesn't provide it\n");
r300->hw.rr.cmd[R300_RR_ROUTE_0] |= 0
| R300_RS_ROUTE_0_COLOR
| (fp_reg << R300_RS_ROUTE_0_COLOR_DEST_SHIFT);
InputsRead &= ~FRAG_BIT_COL0;
}
r300->hw.rc.cmd[1] = 0
| (in_texcoords << R300_RS_CNTL_TC_CNT_SHIFT)
| R300_RS_CNTL_0_UNKNOWN_7
| R300_RS_CNTL_0_UNKNOWN_18;
if (r300->state.texture.tc_count > 0) {
r300->hw.rr.cmd[R300_RR_CMD_0] = cmducs(R300_RS_ROUTE_0, fp_reg);
r300->hw.rc.cmd[2] = 0xC0 | (fp_reg-1); /* index of highest RS_ROUTE used*/
} else {
r300->hw.rr.cmd[R300_RR_CMD_0] = cmducs(R300_RS_ROUTE_0, 1);
r300->hw.rc.cmd[2] = 0x0;
}
if (InputsRead)
WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead);
}
#else
void r300_setup_rs_unit(GLcontext *ctx)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
@ -1120,7 +1217,6 @@ void r300_setup_rs_unit(GLcontext *ctx)
R300_STATECHANGE(r300, rc);
R300_STATECHANGE(r300, rr);
#if 1
cur_reg = 0;
r300->hw.rr.cmd[R300_RR_ROUTE_0] = 0;
@ -1163,48 +1259,8 @@ void r300_setup_rs_unit(GLcontext *ctx)
// fprintf(stderr, "rendering with %d texture co-ordinate sets\n", cur_reg);
#else
for(i = 1; i <= 8; ++i)
r300->hw.ri.cmd[i] = 0x00d10000;
r300->hw.ri.cmd[R300_RI_INTERP_1] |= R300_RS_INTERP_1_UNKNOWN;
r300->hw.ri.cmd[R300_RI_INTERP_2] |= R300_RS_INTERP_2_UNKNOWN;
r300->hw.ri.cmd[R300_RI_INTERP_3] |= R300_RS_INTERP_3_UNKNOWN;
#if 1
for(i = 2; i <= 8; ++i)
r300->hw.ri.cmd[i] |= 4;
#endif
for(i = 1; i <= 8; ++i)
r300->hw.rr.cmd[i] = 0;
/* textures enabled ? */
if(r300->state.texture.tc_count>0){
/* This code only really works with one set of texture coordinates */
/* The second constant is needed to get glxgears display anything .. */
r300->hw.rc.cmd[1] = R300_RS_CNTL_0_UNKNOWN_7
| R300_RS_CNTL_0_UNKNOWN_18
| (r300->state.texture.tc_count<<R300_RS_CNTL_TC_CNT_SHIFT);
r300->hw.rc.cmd[2] = 0xc0;
((drm_r300_cmd_header_t*)r300->hw.rr.cmd)->unchecked_state.count = 1;
r300->hw.rr.cmd[R300_RR_ROUTE_0] = 0x24008;
} else {
/* The second constant is needed to get glxgears display anything .. */
r300->hw.rc.cmd[1] = R300_RS_CNTL_0_UNKNOWN_7 | R300_RS_CNTL_0_UNKNOWN_18;
r300->hw.rc.cmd[2] = 0;
((drm_r300_cmd_header_t*)r300->hw.rr.cmd)->unchecked_state.count = 1;
r300->hw.rr.cmd[R300_RR_ROUTE_0] = 0x4000;
}
#endif
}
#endif // USE_ARB_F_P
#define vpucount(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
@ -1477,7 +1533,109 @@ void r300SetupVertexProgram(r300ContextPtr rmesa)
#endif
}
/* This is probably wrong for some values, I need to test this
* some more. Range checking would be a good idea also..
*
* But it works for most things. I'll fix it later if someone
* else with a better clue doesn't
*/
unsigned int r300PackFloat24(float f)
{
float mantissa;
int exponent;
unsigned int float24 = 0;
if (f == 0.0) return 0;
mantissa = frexpf(f, &exponent);
/* Handle -ve */
if (mantissa < 0) {
float24 |= (1<<23);
mantissa = mantissa * -1.0;
}
/* Handle exponent, bias of 63 */
exponent += 62;
float24 |= (exponent << 16);
/* Kill 7 LSB of mantissa */
float24 |= (r300PackFloat32(mantissa) & 0x7FFFFF) >> 7;
return float24;
}
#if USE_ARB_F_P == 1
void r300SetupPixelShader(r300ContextPtr rmesa)
{
GLcontext *ctx = rmesa->radeon.glCtx;
struct r300_fragment_program *rp = ctx->FragmentProgram._Current;
int i,k;
if (!rp) /* should only happenen once, just after context is created */
return;
if (!rp->translated) {
translate_fragment_shader(ctx->FragmentProgram._Current);
if (!rp->translated) {
fprintf(stderr, "%s: No valid fragment shader, exiting\n", __func__);
exit(-1);
}
}
R300_STATECHANGE(rmesa, fpt);
for(i=0;i<rp->tex.length;i++)
rmesa->hw.fpt.cmd[R300_FPT_INSTR_0+i]=rp->tex.inst[i];
rmesa->hw.fpt.cmd[R300_FPT_CMD_0]=cmducs(R300_PFS_TEXI_0, rp->tex.length);
#define OUTPUT_FIELD(st, reg, field) \
R300_STATECHANGE(rmesa, st); \
for(i=0;i<=rp->alu_end;i++) \
rmesa->hw.st.cmd[R300_FPI_INSTR_0+i]=rp->alu.inst[i].field;\
rmesa->hw.st.cmd[R300_FPI_CMD_0]=cmducs(reg, rp->alu_end+1);
OUTPUT_FIELD(fpi[0], R300_PFS_INSTR0_0, inst0);
OUTPUT_FIELD(fpi[1], R300_PFS_INSTR1_0, inst1);
OUTPUT_FIELD(fpi[2], R300_PFS_INSTR2_0, inst2);
OUTPUT_FIELD(fpi[3], R300_PFS_INSTR3_0, inst3);
#undef OUTPUT_FIELD
R300_STATECHANGE(rmesa, fp);
/* I just want to say, the way these nodes are stored.. weird.. */
for (i=0,k=(4-(rp->cur_node+1));i<4;i++,k++) {
if (i<(rp->cur_node+1)) {
rmesa->hw.fp.cmd[R300_FP_NODE0+k]=
(rp->node[i].alu_offset << R300_PFS_NODE_ALU_OFFSET_SHIFT)
| (rp->node[i].alu_end << R300_PFS_NODE_ALU_END_SHIFT)
| (rp->node[i].tex_offset << R300_PFS_NODE_TEX_OFFSET_SHIFT)
| (rp->node[i].tex_end << R300_PFS_NODE_TEX_END_SHIFT)
| ( (k==3) ? R300_PFS_NODE_LAST_NODE : 0);
} else {
rmesa->hw.fp.cmd[R300_FP_NODE0+(3-i)] = 0;
}
}
/* PFS_CNTL_0 */
rmesa->hw.fp.cmd[R300_FP_CNTL0]=
rp->cur_node
| (rp->first_node_has_tex<<3);
/* PFS_CNTL_1 */
rmesa->hw.fp.cmd[R300_FP_CNTL1]=rp->max_temp_idx;
/* PFS_CNTL_2 */
rmesa->hw.fp.cmd[R300_FP_CNTL2]=
(rp->alu_offset << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
| (rp->alu_end << R300_PFS_CNTL_ALU_END_SHIFT)
| (rp->tex_offset << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
| (rp->tex_end << R300_PFS_CNTL_TEX_END_SHIFT);
R300_STATECHANGE(rmesa, fpp);
for(i=0;i<rp->param_length;i++){
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0+4*i+0]=r300PackFloat24(rp->param[i].x);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0+4*i+1]=r300PackFloat24(rp->param[i].y);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0+4*i+2]=r300PackFloat24(rp->param[i].z);
rmesa->hw.fpp.cmd[R300_FPP_PARAM_0+4*i+3]=r300PackFloat24(rp->param[i].w);
}
rmesa->hw.fpp.cmd[R300_FPP_CMD_0]=cmducs(R300_PFS_PARAM_0_X, rp->param_length*4);
}
#else
/* just a skeleton for now.. */
void r300GenerateTexturePixelShader(r300ContextPtr r300)
{
@ -1652,6 +1810,7 @@ int i,k;
rmesa->hw.fpp.cmd[R300_FPP_CMD_0]=cmducs(R300_PFS_PARAM_0_X, rmesa->state.pixel_shader.param_length);
}
#endif
/**
* Called by Mesa after an internal state update.
@ -2118,3 +2277,4 @@ void r300InitStateFuncs(struct dd_function_table* functions)
functions->PolygonOffset = r300PolygonOffset;
functions->PolygonMode = r300PolygonMode;
}

View file

@ -17,6 +17,7 @@
#include "radeon_ioctl.h"
#include "radeon_state.h"
#include "r300_context.h"
#if USE_ARB_F_P == 0
#include "r300_ioctl.h"
#include "r300_state.h"
#include "r300_reg.h"
@ -265,4 +266,5 @@ void r300GenerateTextureFragmentShader(r300ContextPtr r300)
p->alu_end = ps->program.alu.length - 1;
p->alu_offset = 0;
}
#endif // USE_ARB_F_P == 0