mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-21 23:50:22 +01:00
Kill all the current shader code.
This commit is contained in:
parent
7d907ef69c
commit
a05db7f505
4 changed files with 0 additions and 1389 deletions
|
|
@ -1,91 +0,0 @@
|
|||
#include "glheader.h"
|
||||
#include "macros.h"
|
||||
#include "enums.h"
|
||||
|
||||
#include "program.h"
|
||||
#include "nouveau_context.h"
|
||||
#include "nouveau_shader.h"
|
||||
|
||||
static struct program *
|
||||
nv40NewProgram(GLcontext *ctx, GLenum target, GLuint id)
|
||||
{
|
||||
}
|
||||
|
||||
static void
|
||||
nv40BindProgram(GLcontext *ctx, GLenum target, struct program *prog)
|
||||
{
|
||||
}
|
||||
|
||||
static void
|
||||
nv40DeleteProgram(GLcontext *ctx, struct program *prog)
|
||||
{
|
||||
}
|
||||
|
||||
static void
|
||||
nv40ProgramStringNotify(GLcontext *ctx, GLenum target,
|
||||
struct program *prog)
|
||||
{
|
||||
}
|
||||
|
||||
static GLboolean
|
||||
nv40IsProgramNative(GLcontext *ctx, GLenum target, struct program *prog)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
nouveauInitShaderFuncs(GLcontext *ctx)
|
||||
{
|
||||
struct nouveau_context *nmesa = NOUVEAU_CONTEXT(ctx);
|
||||
|
||||
if (nmesa->screen->card_type == NV_40) {
|
||||
ctx->Driver.NewProgram = nv40NewProgram;
|
||||
ctx->Driver.BindProgram = nv40BindProgram;
|
||||
ctx->Driver.DeleteProgram = nv40DeleteProgram;
|
||||
ctx->Driver.ProgramStringNotify = nv40ProgramStringNotify;
|
||||
ctx->Driver.IsProgramNative = nv40IsProgramNative;
|
||||
}
|
||||
}
|
||||
|
||||
#define LONGBITS (sizeof(long) * 8)
|
||||
void
|
||||
nvsBitSet(long *rec, int bit)
|
||||
{
|
||||
int ri = bit / LONGBITS;
|
||||
int rb = bit % LONGBITS;
|
||||
|
||||
rec[ri] |= (1 << rb);
|
||||
}
|
||||
|
||||
void
|
||||
nvsBitClear(long *rec, int bit)
|
||||
{
|
||||
int ri = bit / LONGBITS;
|
||||
int rb = bit % LONGBITS;
|
||||
|
||||
rec[ri] &= ~(1 << rb);
|
||||
}
|
||||
|
||||
void
|
||||
nvsRecInit(long **rec, int max)
|
||||
{
|
||||
int c = (max / LONGBITS) + ((max % LONGBITS) ? 1 : 0);
|
||||
*rec = calloc(c, sizeof(long));
|
||||
}
|
||||
|
||||
int
|
||||
nvsAllocIndex(long *rec, int max)
|
||||
{
|
||||
int c = (max / LONGBITS) + ((max % LONGBITS) ? 1 : 0);
|
||||
int i, idx = 0;
|
||||
|
||||
for (i=0;i<c;i++) {
|
||||
idx = ffsl(~rec[i]);
|
||||
if (idx) {
|
||||
nvsBitSet(rec, (idx - 1));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return (idx - 1);
|
||||
}
|
||||
|
||||
|
|
@ -1,74 +0,0 @@
|
|||
#ifndef __NOUVEAU_SHADER_H__
|
||||
#define __NOUVEAU_SHADER_H__
|
||||
|
||||
typedef struct _nouveau_regrec nouveau_regrec;
|
||||
typedef struct _nouveau_srcreg nouveau_srcreg;
|
||||
typedef struct _nouveau_dstreg nouveau_dstreg;
|
||||
typedef struct _nouveau_vertex_program nouveau_vertex_program;
|
||||
|
||||
/* Instruction flags, used by emit_arith functions */
|
||||
#define NOUVEAU_OUT_ABS (1 << 0)
|
||||
#define NOUVEAU_OUT_SAT (1 << 1)
|
||||
|
||||
typedef enum {
|
||||
UNKNOWN = 0,
|
||||
HW_TEMP,
|
||||
HW_INPUT,
|
||||
HW_CONST,
|
||||
HW_OUTPUT
|
||||
} nouveau_regtype;
|
||||
|
||||
/* To track a hardware register's state */
|
||||
struct _nouveau_regrec {
|
||||
nouveau_regtype file;
|
||||
int hw_id;
|
||||
int ref;
|
||||
};
|
||||
|
||||
struct _nouveau_srcreg {
|
||||
nouveau_regrec *hw;
|
||||
int idx;
|
||||
|
||||
int negate;
|
||||
int swizzle;
|
||||
};
|
||||
|
||||
struct _nouveau_dstreg {
|
||||
nouveau_regrec *hw;
|
||||
int idx;
|
||||
|
||||
int mask;
|
||||
|
||||
int condup, condreg;
|
||||
int condtest;
|
||||
int condswz;
|
||||
};
|
||||
|
||||
struct _nouveau_vertex_program {
|
||||
struct vertex_program mesa_program; /* must be first! */
|
||||
|
||||
/* Used to convert from Mesa register state to on-hardware state */
|
||||
long *temps_in_use;
|
||||
nouveau_regrec inputs[14];
|
||||
nouveau_regrec temps[64];
|
||||
|
||||
long *hwtemps_written;
|
||||
long *hwtemps_in_use;
|
||||
|
||||
unsigned int *insns;
|
||||
unsigned int insns_alloced;
|
||||
unsigned int inst_count;
|
||||
unsigned int inst_start;
|
||||
};
|
||||
|
||||
/* Helper functions */
|
||||
void nvsRecInit (long **rec, int max);
|
||||
void nvsBitSet (long *rec, int bit);
|
||||
void nvsBitClear (long *rec, int bit);
|
||||
int nvsAllocIndex(long *rec, int max);
|
||||
|
||||
int nv40TranslateVertexProgram(nouveau_vertex_program *vp);
|
||||
//int nv40TranslateFragmentProgram(nouveau_vertex_program *vp);
|
||||
|
||||
#endif /* __NOUVEAU_SHADER_H__ */
|
||||
|
||||
|
|
@ -1,472 +0,0 @@
|
|||
#ifndef __NV40_REG_H__
|
||||
#define __NV40_REG_H__
|
||||
|
||||
#define NV40_TX 0x00001A00
|
||||
#define NV40_TX_UNIT(n) (0x1A00 + (n * 32))
|
||||
/* DWORD 0 - texture address */
|
||||
/* DWORD 1 */
|
||||
# define NV40_TX_MIPMAP_COUNT_SHIFT 20
|
||||
# define NV40_TX_MIPMAP_COUNT_MASK (0xF << 20) /* guess */
|
||||
# define NV40_TX_NPOT (1 << 13) /* also set on RECT, even if POT */
|
||||
# define NV40_TX_RECTANGLE (1 << 14)
|
||||
# define NV40_TX_FORMAT_SHIFT 8
|
||||
# define NV40_TX_FORMAT_MASK (0x1F << 8) /* *bad* guess */
|
||||
# define NV40_TX_FORMAT_L8 0x01
|
||||
# define NV40_TX_FORMAT_A1R5G5B5 0x02
|
||||
# define NV40_TX_FORMAT_A4R4G4B4 0x03
|
||||
# define NV40_TX_FORMAT_R5G6B5 0x04
|
||||
# define NV40_TX_FORMAT_A8R8G8B8 0x05
|
||||
# define NV40_TX_FORMAT_DXT1 0x06
|
||||
# define NV40_TX_FORMAT_DXT3 0x07
|
||||
# define NV40_TX_FORMAT_DXT5 0x08
|
||||
# define NV40_TX_FORMAT_L16 0x14
|
||||
# define NV40_TX_FORMAT_G16R16 0x15 /* possibly wrong */
|
||||
# define NV40_TX_FORMAT_A8L8 0x18 /* possibly wrong */
|
||||
# define NV40_TX_NCOMP_SHIFT 4 /* 2=2D, 3=3D*/
|
||||
# define NV40_TX_NCOMP_MASK (0x3 << 4) /* possibly wrong */
|
||||
# define NV40_TX_CUBIC (1 << 2)
|
||||
/* DWORD 2
|
||||
Need to confirm whether or not "3" is CLAMP or CLAMP_TO_EDGE. Posts around the
|
||||
internet seem to indicate that GL_CLAMP isn't supported on nvidia hardware, and
|
||||
GL_CLAMP_TO_EDGE is used instead.
|
||||
*/
|
||||
# define NV40_TX_WRAP_S_SHIFT 0
|
||||
# define NV40_TX_WRAP_S_MASK (0xF << 0)
|
||||
# define NV40_TX_WRAP_T_SHIFT 8
|
||||
# define NV40_TX_WRAP_T_MASK (0xF << 8)
|
||||
# define NV40_TX_WRAP_R_SHIFT 16
|
||||
# define NV40_TX_WRAP_R_MASK (0xF << 16)
|
||||
# define NV40_TX_REPEAT 1
|
||||
# define NV40_TX_MIRRORED_REPEAT 2
|
||||
# define NV40_TX_CLAMP_TO_EDGE 3
|
||||
# define NV40_TX_CLAMP_TO_BORDER 4
|
||||
# define NV40_TX_CLAMP NV40_TX_CLAMP_TO_EDGE
|
||||
/* DWORD 3 */
|
||||
/* DWORD 4
|
||||
Appears to be related to swizzling of the texture image data into a RGBA value.
|
||||
A lot of uncertainty here...
|
||||
*/
|
||||
# define NV40_TX_S0_X_SHIFT 14
|
||||
# define NV40_TX_S0_Y_SHIFT 12
|
||||
# define NV40_TX_S0_Z_SHIFT 10
|
||||
# define NV40_TX_S0_W_SHIFT 8
|
||||
# define NV40_TX_S0_ZERO 0
|
||||
# define NV40_TX_S0_ONE 1
|
||||
# define NV40_TX_S0_S1 2 /* take value from NV40_TX_S1_* */
|
||||
# define NV40_TX_S1_X_SHIFT 6
|
||||
# define NV40_TX_S1_Y_SHIFT 4
|
||||
# define NV40_TX_S1_Z_SHIFT 2
|
||||
# define NV40_TX_S1_W_SHIFT 0
|
||||
# define NV40_TX_S1_X 3
|
||||
# define NV40_TX_S1_Y 2
|
||||
# define NV40_TX_S1_Z 1
|
||||
# define NV40_TX_S1_W 0
|
||||
/* DWORD 5 */
|
||||
# define NV40_TX_MIN_FILTER_SHIFT 16
|
||||
# define NV40_TX_MIN_FILTER_MASK (0xF << 16)
|
||||
# define NV40_TX_MAG_FILTER_SHIFT 24
|
||||
# define NV40_TX_MAG_FILTER_MASK (0xF << 24)
|
||||
# define NV40_TX_FILTER_NEAREST 1
|
||||
# define NV40_TX_FILTER_LINEAR 2
|
||||
# define NV40_TX_FILTER_NEAREST_MIPMAP_NEAREST 3
|
||||
# define NV40_TX_FILTER_LINEAR_MIPMAP_NEAREST 4
|
||||
# define NV40_TX_FILTER_NEAREST_MIPMAP_LINEAR 5
|
||||
# define NV40_TX_FILTER_LINEAR_MIPMAP_LINEAR 6
|
||||
/* DWORD 6 */
|
||||
# define NV40_TX_WIDTH_SHIFT 16
|
||||
# define NV40_TX_WIDTH_MASK (0xFFFF << 16)
|
||||
# define NV40_TX_HEIGHT_SHIFT 0
|
||||
# define NV40_TX_HEIGHT_MASK (0xFFFF << 0)
|
||||
/* DWORD 7 */
|
||||
|
||||
|
||||
#define NV40_TX_DEPTH 0x1840
|
||||
#define NV40_TX_DEPTH_UNIT(n) (0x1840 + n*4)
|
||||
# define NV40_TX_DEPTH_SHIFT 20
|
||||
# define NV40_TX_DEPTH_MASK (0xFFF << 20)
|
||||
# define NV40_TX_DEPTH_NPOT (1 << 7) /* also set for RECT, even if POT */
|
||||
|
||||
/* Vertex Program upload / control */
|
||||
#define NV40_VP_UPLOAD_FROM_ID 0x1E9C /* The next VP_UPLOAD_INST is uploading instruction <n> (guess..) */
|
||||
#define NV40_VP_PROGRAM_START_ID 0x1EA0 /* Start executing program from instruction <n> */
|
||||
|
||||
/* Vertex programs instruction set
|
||||
*
|
||||
* 128bit opcodes, split into 4 32-bit ones for ease of use.
|
||||
*
|
||||
* Non-native instructions
|
||||
* ABS - MOV + NV40_VP_INST0_DEST_ABS
|
||||
* POW - EX2 + MUL + LG2
|
||||
* SUB - ADD, second source negated
|
||||
* SWZ - MOV
|
||||
* XPD -
|
||||
*
|
||||
* Register access
|
||||
* - Only one INPUT can be accessed per-instruction (move extras into TEMPs)
|
||||
* - Only one CONST can be accessed per-instruction (move extras into TEMPs)
|
||||
*
|
||||
* Relative Addressing
|
||||
* According to the value returned for MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB
|
||||
* there are only two address registers available. The destination in the ARL
|
||||
* instruction is set to TEMP <n> (The temp isn't actually written).
|
||||
*
|
||||
* When using vanilla ARB_v_p, the proprietary driver will squish both the available
|
||||
* ADDRESS regs into the first hardware reg in the X and Y components.
|
||||
*
|
||||
* To use an address reg as an index into consts, the CONST_SRC is set to
|
||||
* (const_base + offset) and INDEX_CONST is set.
|
||||
*
|
||||
* It is similar for inputs, INPUT_SRC is set to the offset value and INDEX_INPUT
|
||||
* is set.
|
||||
*
|
||||
* To access the second address reg use ADDR_REG_SELECT_1. A particular component
|
||||
* of the address regs is selected with ADDR_SWZ.
|
||||
*
|
||||
* Only one address register can be accessed per instruction, but you may use
|
||||
* the address reg as an index into both consts and inputs in the same instruction
|
||||
* as long as the swizzles also match.
|
||||
*
|
||||
* Conditional execution (see NV_vertex_program{2,3} for details)
|
||||
* All instructions appear to be able to modify one of two condition code registers.
|
||||
* This is enabled by setting COND_UPDATE_ENABLE. The second condition registers is
|
||||
* updated by setting COND_REG_SELECT_1.
|
||||
*
|
||||
* Conditional execution of an instruction is enabled by setting COND_TEST_ENABLE, and
|
||||
* selecting the condition which will allow the test to pass with COND_{FL,LT,...}.
|
||||
* It is possible to swizzle the values in the condition register, which allows for
|
||||
* testing against an individual component.
|
||||
*
|
||||
* Branching
|
||||
* The BRA/CAL instructions seem to follow a slightly different opcode layout. The
|
||||
* destination instruction ID (IADDR) overlaps SRC2. Instruction ID's seem to be
|
||||
* numbered based on the UPLOAD_FROM_ID FIFO command, and is incremented automatically
|
||||
* on each UPLOAD_INST FIFO command.
|
||||
*
|
||||
* Conditional branching is achieved by using the condition tests described above.
|
||||
* There doesn't appear to be dedicated looping instructions, but this can be done
|
||||
* using a temp reg + conditional branching.
|
||||
*
|
||||
* Subroutines may be uploaded before the main program itself, but the first executed
|
||||
* instruction is determined by the PROGRAM_START_ID FIFO command.
|
||||
*
|
||||
* Texture lookup
|
||||
* TODO
|
||||
*/
|
||||
|
||||
/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */
|
||||
#define NV40_VP_INST0_UNK0 (1 << 30) /* set when writing result regs */
|
||||
#define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29) /* unsure about this */
|
||||
#define NV40_VP_INST_INDEX_INPUT (1 << 27) /* Use an address reg as in index into attribs */
|
||||
#define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25)
|
||||
#define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24)
|
||||
#define NV40_VP_INST_DEST_TEMP_ABS (1 << 21)
|
||||
#define NV40_VP_INST_DEST_TEMP_SHIFT 15
|
||||
#define NV40_VP_INST_DEST_TEMP_MASK (0x3F << 15)
|
||||
#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13) /* write masking based on condition test */
|
||||
#define NV40_VP_INST_COND_SHIFT 10
|
||||
#define NV40_VP_INST_COND_MASK (0x7 << 10)
|
||||
# define NV40_VP_INST_COND_FL 0
|
||||
# define NV40_VP_INST_COND_LT 1
|
||||
# define NV40_VP_INST_COND_EQ 2
|
||||
# define NV40_VP_INST_COND_LE 3
|
||||
# define NV40_VP_INST_COND_GT 4
|
||||
# define NV40_VP_INST_COND_NE 5
|
||||
# define NV40_VP_INST_COND_GE 6
|
||||
# define NV40_VP_INST_COND_TR 7
|
||||
#define NV40_VP_INST_COND_SWZ_X_SHIFT 8
|
||||
#define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8)
|
||||
#define NV40_VP_INST_COND_SWZ_Y_SHIFT 6
|
||||
#define NV40_VP_INST_COND_SWZ_Y_MASK (3 << 6)
|
||||
#define NV40_VP_INST_COND_SWZ_Z_SHIFT 4
|
||||
#define NV40_VP_INST_COND_SWZ_Z_MASK (3 << 4)
|
||||
#define NV40_VP_INST_COND_SWZ_W_SHIFT 2
|
||||
#define NV40_VP_INST_COND_SWZ_W_MASK (3 << 2)
|
||||
#define NV40_VP_INST_COND_SWZ_ALL_SHIFT 2
|
||||
#define NV40_VP_INST_COND_SWZ_ALL_MASK (0xFF << 2)
|
||||
#define NV40_VP_INST_ADDR_SWZ_SHIFT 0
|
||||
#define NV40_VP_INST_ADDR_SWZ_MASK (0x03 << 0)
|
||||
|
||||
/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */
|
||||
#define NV40_VP_INST_OPCODE_SHIFT 22
|
||||
#define NV40_VP_INST_OPCODE_MASK (0x3FF << 22)
|
||||
/*TODO: confirm which source slots correspond to the GL sources,
|
||||
* renouveau should be correct in most places though.. Also,
|
||||
* document them here.
|
||||
*/
|
||||
# define NV40_VP_INST_OP_NOP 0x000
|
||||
# define NV40_VP_INST_OP_MOV 0x001
|
||||
# define NV40_VP_INST_OP_MUL 0x002
|
||||
# define NV40_VP_INST_OP_ADD 0x003
|
||||
# define NV40_VP_INST_OP_MAD 0x004
|
||||
# define NV40_VP_INST_OP_DP3 0x005
|
||||
# define NV40_VP_INST_OP_DP4 0x007
|
||||
# define NV40_VP_INST_OP_DPH 0x006
|
||||
# define NV40_VP_INST_OP_DST 0x008
|
||||
# define NV40_VP_INST_OP_MIN 0x009
|
||||
# define NV40_VP_INST_OP_MAX 0x00A
|
||||
# define NV40_VP_INST_OP_SLT 0x00B
|
||||
# define NV40_VP_INST_OP_SGE 0x00C
|
||||
# define NV40_VP_INST_OP_ARL 0x00D
|
||||
# define NV40_VP_INST_OP_FRC 0x00E
|
||||
# define NV40_VP_INST_OP_FLR 0x00F
|
||||
# define NV40_VP_INST_OP_SEQ 0x010
|
||||
# define NV40_VP_INST_OP_SFL 0x011
|
||||
# define NV40_VP_INST_OP_SGT 0x012
|
||||
# define NV40_VP_INST_OP_SLE 0x013
|
||||
# define NV40_VP_INST_OP_SNE 0x014
|
||||
# define NV40_VP_INST_OP_STR 0x015
|
||||
# define NV40_VP_INST_OP_SSG 0x016
|
||||
# define NV40_VP_INST_OP_ARR 0x017
|
||||
# define NV40_VP_INST_OP_ARA 0x018
|
||||
# define NV40_VP_INST_OP_RCP 0x040
|
||||
# define NV40_VP_INST_OP_RCC 0x060
|
||||
# define NV40_VP_INST_OP_RSQ 0x080
|
||||
# define NV40_VP_INST_OP_EXP 0x0A0
|
||||
# define NV40_VP_INST_OP_LOG 0x0C0
|
||||
# define NV40_VP_INST_OP_LIT 0x0E0
|
||||
# define NV40_VP_INST_OP_BRA 0x120
|
||||
# define NV40_VP_INST_OP_CAL 0x160
|
||||
# define NV40_VP_INST_OP_RET 0x180
|
||||
# define NV40_VP_INST_OP_LG2 0x1A0
|
||||
# define NV40_VP_INST_OP_EX2 0x1C0
|
||||
# define NV40_VP_INST_OP_COS 0x200
|
||||
# define NV40_VP_INST_OP_PUSHA 0x260
|
||||
# define NV40_VP_INST_OP_POPA 0x280
|
||||
#define NV40_VP_INST_CONST_SRC_SHIFT 12
|
||||
#define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12)
|
||||
#define NV40_VP_INST_INPUT_SRC_SHIFT 8
|
||||
#define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8)
|
||||
# define NV40_VP_INST_IN_POS 0 /* These seem to match the bindings specified in */
|
||||
# define NV40_VP_INST_IN_WEIGHT 1 /* the ARB_v_p spec (2.14.3.1) */
|
||||
# define NV40_VP_INST_IN_NORMAL 2
|
||||
# define NV40_VP_INST_IN_COL0 3 /* Should probably confirm them all thougth */
|
||||
# define NV40_VP_INST_IN_COL1 4
|
||||
# define NV40_VP_INST_IN_FOGC 5
|
||||
# define NV40_VP_INST_IN_TC0 8
|
||||
# define NV40_VP_INST_IN_TC(n) (8+n)
|
||||
#define NV40_VP_INST_SRC0H_SHIFT 0
|
||||
#define NV40_VP_INST_SRC0H_MASK (0xFF << 0)
|
||||
|
||||
/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */
|
||||
#define NV40_VP_INST_SRC0L_SHIFT 23
|
||||
#define NV40_VP_INST_SRC0L_MASK (0x1FF << 23)
|
||||
#define NV40_VP_INST_SRC1_SHIFT 6
|
||||
#define NV40_VP_INST_SRC1_MASK (0x1FFFF << 6)
|
||||
#define NV40_VP_INST_SRC2H_SHIFT 0
|
||||
#define NV40_VP_INST_SRC2H_MASK (0x3F << 0)
|
||||
#define NV40_VP_INST_IADDRH_SHIFT 0
|
||||
#define NV40_VP_INST_IADDRH_MASK (0x1F << 0) /* guess, need to test this */
|
||||
#
|
||||
/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */
|
||||
#define NV40_VP_INST_IADDRL_SHIFT 29
|
||||
#define NV40_VP_INST_IADDRL_MASK (7 << 29)
|
||||
#define NV40_VP_INST_SRC2L_SHIFT 21
|
||||
#define NV40_VP_INST_SRC2L_MASK (0x7FF << 21)
|
||||
/* bits 7-12 seem to always be set to 1 */
|
||||
#define NV40_VP_INST_WRITEMASK_SHIFT 13
|
||||
#define NV40_VP_INST_WRITEMASK_MASK (0xF << 13)
|
||||
# define NV40_VP_INST_WRITEMASK_X (1 << 16)
|
||||
# define NV40_VP_INST_WRITEMASK_Y (1 << 15)
|
||||
# define NV40_VP_INST_WRITEMASK_Z (1 << 14)
|
||||
# define NV40_VP_INST_WRITEMASK_W (1 << 13)
|
||||
#define NV40_VP_INST_DEST_SHIFT 2
|
||||
#define NV40_VP_INST_DEST_MASK (31 << 2)
|
||||
# define NV40_VP_INST_DEST_POS 0
|
||||
# define NV40_VP_INST_DEST_COL0 1
|
||||
# define NV40_VP_INST_DEST_COL1 2
|
||||
# define NV40_VP_INST_DEST_BFC0 3
|
||||
# define NV40_VP_INST_DEST_BFC1 4
|
||||
# define NV40_VP_INST_DEST_FOGC 5
|
||||
# define NV40_VP_INST_DEST_PSZ 6
|
||||
# define NV40_VP_INST_DEST_TC0 7
|
||||
# define NV40_VP_INST_DEST_TC(n) (7+n)
|
||||
# define NV40_VP_INST_DEST_TEMP 0x1F /* see NV40_VP_INST0_* for actual register */
|
||||
#define NV40_VP_INST_INDEX_CONST (1 << 1)
|
||||
#define NV40_VP_INST_UNK_00 (1 << 0) /* appears to be set on the last inst only */
|
||||
|
||||
/* Useful to split the source selection regs into their pieces */
|
||||
#define NV40_VP_SRC0_HIGH_SHIFT 9
|
||||
#define NV40_VP_SRC0_HIGH_MASK 0x0001FE00
|
||||
#define NV40_VP_SRC0_LOW_MASK 0x000001FF
|
||||
#define NV40_VP_SRC2_HIGH_SHIFT 11
|
||||
#define NV40_VP_SRC2_HIGH_MASK 0x0001F800
|
||||
#define NV40_VP_SRC2_LOW_MASK 0x000007FF
|
||||
|
||||
/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */
|
||||
#define NV40_VP_SRC_NEGATE 16
|
||||
#define NV40_VP_SRC_SWZ_X_SHIFT 14
|
||||
#define NV40_VP_SRC_SWZ_X_MASK (3 << 14)
|
||||
#define NV40_VP_SRC_SWZ_Y_SHIFT 12
|
||||
#define NV40_VP_SRC_SWZ_Y_MASK (3 << 12)
|
||||
#define NV40_VP_SRC_SWZ_Z_SHIFT 10
|
||||
#define NV40_VP_SRC_SWZ_Z_MASK (3 << 10)
|
||||
#define NV40_VP_SRC_SWZ_W_SHIFT 8
|
||||
#define NV40_VP_SRC_SWZ_W_MASK (3 << 8)
|
||||
#define NV40_VP_SRC_SWZ_ALL_SHIFT 8
|
||||
#define NV40_VP_SRC_SWZ_ALL_MASK (0xFF << 8)
|
||||
#define NV40_VP_SRC_TEMP_SRC_SHIFT 2
|
||||
#define NV40_VP_SRC_TEMP_SRC_MASK (0x3F << 2)
|
||||
#define NV40_VP_SRC_REG_TYPE_SHIFT 0
|
||||
#define NV40_VP_SRC_REG_TYPE_MASK (3 << 0)
|
||||
# define NV40_VP_SRC_REG_TYPE_UNK0 0
|
||||
# define NV40_VP_SRC_REG_TYPE_TEMP 1
|
||||
# define NV40_VP_SRC_REG_TYPE_INPUT 2
|
||||
# define NV40_VP_SRC_REG_TYPE_CONST 3
|
||||
|
||||
/*
|
||||
-- GF6800GT - PCIID 10de:0045 (rev a1) --
|
||||
|
||||
== Fragment program instruction set
|
||||
Not FIFO commands, uploaded into a memory buffer. The fragment program has
|
||||
always appeared in the same map as the texture image data has. Usually it's
|
||||
the first thing in the map, followed immediately by the textures.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* Each fragment program opcode appears to be comprised of 4 32-bit values.
|
||||
*
|
||||
* 0 - Opcode, output reg/mask, ATTRIB source
|
||||
* 1 - Source 0
|
||||
* 2 - Source 1
|
||||
* 3 - Source 2
|
||||
*
|
||||
* Constants are inserted directly after the instruction that uses them.
|
||||
*
|
||||
* It appears that it's not possible to use two input registers in one
|
||||
* instruction as the input sourcing is done in the instruction dword
|
||||
* and not the source selection dwords. As such instructions such as:
|
||||
*
|
||||
* ADD result.color, fragment.color, fragment.texcoord[0];
|
||||
*
|
||||
* must be split into two MOV's and then an ADD (nvidia does this) but
|
||||
* I'm not sure why it's not just one MOV and then source the second input
|
||||
* in the ADD instruction..
|
||||
*
|
||||
* Negation of the full source is done with NV40_FP_REG_NEGATE, arbitrary
|
||||
* negation requires multiplication with a const.
|
||||
*
|
||||
* Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE
|
||||
* The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO
|
||||
* is implemented simply by not writing to the relevant components of the destination.
|
||||
*
|
||||
* Non-native instructions:
|
||||
* LIT
|
||||
* LRP - MAD+MAD
|
||||
* SUB - ADD, negate second source
|
||||
* RSQ - LG2 + EX2
|
||||
* POW - LG2 + MUL + EX2
|
||||
* SCS - COS + SIN
|
||||
* XPD
|
||||
* DP2 - MUL + ADD
|
||||
*/
|
||||
|
||||
//== Opcode / Destination selection ==
|
||||
#define NV40_FP_OP_PROGRAM_END 0x00000001
|
||||
#define NV40_FP_OP_OUT_RESULT (1 << 0) /* uncertain? and what about depth? */
|
||||
#define NV40_FP_OP_OUT_REG_SHIFT 1
|
||||
#define NV40_FP_OP_OUT_REG_MASK (31 << 1) /* uncertain */
|
||||
#define NV40_FP_OP_OUTMASK_SHIFT 9
|
||||
#define NV40_FP_OP_OUTMASK_MASK (0xF << 9)
|
||||
# define NV40_FP_OP_OUT_X (1 << 9)
|
||||
# define NV40_FP_OP_OUT_Y (1 << 10)
|
||||
# define NV40_FP_OP_OUT_Z (1 << 11)
|
||||
# define NV40_FP_OP_OUT_W (1 << 12)
|
||||
/* Uncertain about these, especially the input_src values.. it's possible that
|
||||
* they can be dynamically changed.
|
||||
*/
|
||||
#define NV40_FP_OP_INPUT_SRC_SHIFT 13
|
||||
#define NV40_FP_OP_INPUT_SRC_MASK (15 << 13)
|
||||
# define NV40_FP_OP_INPUT_SRC_POSITION 0x0
|
||||
# define NV40_FP_OP_INPUT_SRC_COL0 0x1
|
||||
# define NV40_FP_OP_INPUT_SRC_COL1 0x2
|
||||
# define NV40_FP_OP_INPUT_SRC_TC0 0x4
|
||||
# define NV40_FP_OP_INPUT_SRC_TC(n) (0x4 + n)
|
||||
#define NV40_FP_OP_TEX_UNIT_SHIFT 17
|
||||
#define NV40_FP_OP_TEX_UNIT_MASK (0xF << 17) /* guess */
|
||||
#define NV40_FP_OP_PRECISION_SHIFT 22
|
||||
#define NV40_FP_OP_PRECISION_MASK (3 << 22)
|
||||
# define NV40_FP_PRECISION_FP32 0
|
||||
# define NV40_FP_PRECISION_FP16 1
|
||||
# define NV40_FP_PRECISION_FX12 2
|
||||
#define NV40_FP_OP_OPCODE_SHIFT 24
|
||||
#define NV40_FP_OP_OPCODE_MASK (0x7F << 24)
|
||||
# define NV40_FP_OP_OPCODE_MOV 0x01
|
||||
# define NV40_FP_OP_OPCODE_MUL 0x02
|
||||
# define NV40_FP_OP_OPCODE_ADD 0x03
|
||||
# define NV40_FP_OP_OPCODE_MAD 0x04
|
||||
# define NV40_FP_OP_OPCODE_DP3 0x05
|
||||
# define NV40_FP_OP_OPCODE_DP4 0x06
|
||||
# define NV40_FP_OP_OPCODE_DST 0x07
|
||||
# define NV40_FP_OP_OPCODE_MIN 0x08
|
||||
# define NV40_FP_OP_OPCODE_MAX 0x09
|
||||
# define NV40_FP_OP_OPCODE_SLT 0x0A
|
||||
# define NV40_FP_OP_OPCODE_SGE 0x0B
|
||||
# define NV40_FP_OP_OPCODE_SLE 0x0C
|
||||
# define NV40_FP_OP_OPCODE_SGT 0x0D
|
||||
# define NV40_FP_OP_OPCODE_SNE 0x0E
|
||||
# define NV40_FP_OP_OPCODE_SEQ 0x0F
|
||||
# define NV40_FP_OP_OPCODE_FRC 0x10
|
||||
# define NV40_FP_OP_OPCODE_FLR 0x11
|
||||
# define NV40_FP_OP_OPCODE_TEX 0x17
|
||||
# define NV40_FP_OP_OPCODE_TXP 0x18
|
||||
# define NV40_FP_OP_OPCODE_RCP 0x1A
|
||||
# define NV40_FP_OP_OPCODE_EX2 0x1C
|
||||
# define NV40_FP_OP_OPCODE_LG2 0x1D
|
||||
# define NV40_FP_OP_OPCODE_COS 0x22
|
||||
# define NV40_FP_OP_OPCODE_SIN 0x23
|
||||
# define NV40_FP_OP_OPCODE_DP2A 0x2E
|
||||
# define NV40_FP_OP_OPCODE_TXB 0x31
|
||||
# define NV40_FP_OP_OPCODE_DIV 0x3A
|
||||
#define NV40_FP_OP_OUT_SAT (1 << 31)
|
||||
|
||||
/* high order bits of SRC0 */
|
||||
#define NV40_FP_OP_OUT_ABS (1 << 29)
|
||||
#define NV40_FP_OP_COND_SWZ_W_SHIFT 27
|
||||
#define NV40_FP_OP_COND_SWZ_W_MASK (3 << 27)
|
||||
#define NV40_FP_OP_COND_SWZ_Z_SHIFT 25
|
||||
#define NV40_FP_OP_COND_SWZ_Z_MASK (3 << 25)
|
||||
#define NV40_FP_OP_COND_SWZ_Y_SHIFT 23
|
||||
#define NV40_FP_OP_COND_SWZ_Y_MASK (3 << 23)
|
||||
#define NV40_FP_OP_COND_SWZ_X_SHIFT 21
|
||||
#define NV40_FP_OP_COND_SWZ_X_MASK (3 << 21)
|
||||
#define NV40_FP_OP_COND_SWZ_ALL_SHIFT 21
|
||||
#define NV40_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21)
|
||||
#define NV40_FP_OP_COND_SHIFT 18
|
||||
#define NV40_FP_OP_COND_MASK (0x07 << 18)
|
||||
# define NV40_FP_OP_COND_FL 0
|
||||
# define NV40_FP_OP_COND_LT 1
|
||||
# define NV40_FP_OP_COND_EQ 2
|
||||
# define NV40_FP_OP_COND_LE 3
|
||||
# define NV40_FP_OP_COND_GT 4
|
||||
# define NV40_FP_OP_COND_NE 5
|
||||
# define NV40_FP_OP_COND_GE 6
|
||||
# define NV40_FP_OP_COND_TR 7
|
||||
|
||||
/* high order bits of SRC1 */
|
||||
#define NV40_FP_OP_SRC_SCALE_SHIFT 28
|
||||
#define NV40_FP_OP_SRC_SCALE_MASK (3 << 28)
|
||||
|
||||
//== Register selection ==
|
||||
#define NV40_FP_REG_SRC_INPUT (1 << 0) /* uncertain */
|
||||
#define NV40_FP_REG_SRC_CONST (1 << 1)
|
||||
#define NV40_FP_REG_SRC_SHIFT 2 /* uncertain */
|
||||
#define NV40_FP_REG_SRC_MASK (31 << 2)
|
||||
#define NV40_FP_REG_UNK_0 (1 << 8)
|
||||
#define NV40_FP_REG_SWZ_ALL_SHIFT 9
|
||||
#define NV40_FP_REG_SWZ_ALL_MASK (255 << 9)
|
||||
#define NV40_FP_REG_SWZ_X_SHIFT 9
|
||||
#define NV40_FP_REG_SWZ_X_MASK (3 << 9)
|
||||
#define NV40_FP_REG_SWZ_Y_SHIFT 11
|
||||
#define NV40_FP_REG_SWZ_Y_MASK (3 << 11)
|
||||
#define NV40_FP_REG_SWZ_Z_SHIFT 13
|
||||
#define NV40_FP_REG_SWZ_Z_MASK (3 << 13)
|
||||
#define NV40_FP_REG_SWZ_W_SHIFT 15
|
||||
#define NV40_FP_REG_SWZ_W_MASK (3 << 15)
|
||||
# define NV40_FP_SWIZZLE_X 0
|
||||
# define NV40_FP_SWIZZLE_Y 1
|
||||
# define NV40_FP_SWIZZLE_Z 2
|
||||
# define NV40_FP_SWIZZLE_W 3
|
||||
#define NV40_FP_REG_NEGATE (1 << 17)
|
||||
|
||||
#endif
|
||||
|
|
@ -1,752 +0,0 @@
|
|||
#include "glheader.h"
|
||||
#include "macros.h"
|
||||
#include "enums.h"
|
||||
#include "program.h"
|
||||
#include "program_instruction.h"
|
||||
|
||||
#include "nouveau_reg.h"
|
||||
#include "nouveau_shader.h"
|
||||
#include "nouveau_msg.h"
|
||||
|
||||
#include "nv40_reg.h"
|
||||
|
||||
/* TODO:
|
||||
* - Implement support for constants
|
||||
* - Handle SWZ with 0/1 components and partial negate masks
|
||||
* - Handle ARB_position_invarient
|
||||
* - Relative register addressing
|
||||
* - Implement any missing instructions
|
||||
* - Fix scalar instructions (the other "writemask")
|
||||
*/
|
||||
|
||||
static int t_dst_mask(int mask);
|
||||
|
||||
static int
|
||||
alloc_hw_temp(nouveau_vertex_program *vp)
|
||||
{
|
||||
return nvsAllocIndex(vp->hwtemps_in_use, 64);
|
||||
}
|
||||
|
||||
static void
|
||||
free_hw_temp(nouveau_vertex_program *vp, int id)
|
||||
{
|
||||
nvsBitClear(vp->hwtemps_in_use, id);
|
||||
}
|
||||
|
||||
static int
|
||||
alloc_temp(nouveau_vertex_program *vp)
|
||||
{
|
||||
int idx;
|
||||
|
||||
idx = nvsAllocIndex(vp->temps_in_use, 64);
|
||||
if (!idx)
|
||||
return -1;
|
||||
|
||||
vp->temps[idx].file = HW_TEMP;
|
||||
vp->temps[idx].hw_id = -1;
|
||||
vp->temps[idx].ref = -1;
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
static void
|
||||
free_temp(nouveau_vertex_program *vp, nouveau_srcreg *temp)
|
||||
{
|
||||
if (!temp) return;
|
||||
|
||||
if (vp->temps[temp->idx].hw_id != -1)
|
||||
free_hw_temp(vp, vp->temps[temp->idx].hw_id);
|
||||
nvsBitClear(vp->temps_in_use, temp->idx);
|
||||
}
|
||||
|
||||
static void
|
||||
make_srcreg(nouveau_vertex_program *vp,
|
||||
nouveau_srcreg *src,
|
||||
nouveau_regtype type,
|
||||
int id)
|
||||
{
|
||||
switch (type) {
|
||||
case HW_INPUT:
|
||||
src->hw = NULL;
|
||||
src->idx = id;
|
||||
break;
|
||||
case HW_TEMP:
|
||||
src->hw = &vp->temps[id];
|
||||
src->idx = id;
|
||||
break;
|
||||
case HW_CONST:
|
||||
//FIXME: TODO
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
|
||||
src->negate = 0;
|
||||
src->swizzle = 0x1B /* 00011011 - XYZW */;
|
||||
}
|
||||
|
||||
static void
|
||||
make_dstreg(nouveau_vertex_program *vp,
|
||||
nouveau_dstreg *dest,
|
||||
nouveau_regtype type,
|
||||
int id)
|
||||
{
|
||||
if (type == HW_TEMP && id == -1)
|
||||
dest->idx = alloc_temp(vp);
|
||||
else
|
||||
dest->idx = id;
|
||||
switch (type) {
|
||||
case HW_TEMP:
|
||||
dest->idx = id;
|
||||
if (dest->idx == -1)
|
||||
dest->idx = alloc_temp(vp);
|
||||
dest->hw = &vp->temps[dest->idx];
|
||||
break;
|
||||
case HW_OUTPUT:
|
||||
dest->hw = NULL;
|
||||
dest->idx = id;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
|
||||
dest->mask = t_dst_mask(WRITEMASK_XYZW);
|
||||
dest->condup = 0;
|
||||
dest->condreg = 0;
|
||||
dest->condtest = NV40_VP_INST_COND_TR;
|
||||
dest->condswz = 0x1B /* 00011011 - XYZW */;
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
src_to_hw(nouveau_vertex_program *vp, nouveau_srcreg *src,
|
||||
unsigned int *is, unsigned int *cs)
|
||||
{
|
||||
unsigned int hs = 0;
|
||||
|
||||
if (!src) {
|
||||
/* unused sources seem to be INPUT swz XYZW, dont't know if this
|
||||
* actually matters or not...
|
||||
*/
|
||||
hs |= (NV40_VP_SRC_REG_TYPE_INPUT << NV40_VP_SRC_REG_TYPE_SHIFT);
|
||||
hs |= (0x1B << NV40_VP_SRC_SWZ_ALL_SHIFT);
|
||||
return hs;
|
||||
}
|
||||
|
||||
if (!src->hw) { /* this is a forced read from a "real" hardware source */
|
||||
*is = src->idx;
|
||||
hs |= (NV40_VP_SRC_REG_TYPE_INPUT << NV40_VP_SRC_REG_TYPE_SHIFT);
|
||||
} else {
|
||||
switch (src->hw->file) {
|
||||
case HW_INPUT:
|
||||
if (*is != -1) {
|
||||
fprintf(stderr, "multiple inputs detected... not good\n");
|
||||
return;
|
||||
}
|
||||
*is = src->hw->hw_id;
|
||||
hs |= (NV40_VP_SRC_REG_TYPE_INPUT << NV40_VP_SRC_REG_TYPE_SHIFT);
|
||||
break;
|
||||
case HW_CONST:
|
||||
if (*cs != -1) {
|
||||
fprintf(stderr, "multiple consts detected... not good\n");
|
||||
return;
|
||||
}
|
||||
*cs = src->hw->hw_id;
|
||||
hs |= (NV40_VP_SRC_REG_TYPE_CONST << NV40_VP_SRC_REG_TYPE_SHIFT);
|
||||
break;
|
||||
case HW_TEMP:
|
||||
if (src->hw->hw_id == -1) {
|
||||
fprintf(stderr, "read from unwritten temp!\n");
|
||||
return;
|
||||
}
|
||||
hs |= (NV40_VP_SRC_REG_TYPE_TEMP << NV40_VP_SRC_REG_TYPE_SHIFT) |
|
||||
(src->hw->hw_id << NV40_VP_SRC_TEMP_SRC_SHIFT);
|
||||
|
||||
if (--src->hw->ref == 0)
|
||||
free_hw_temp(vp, src->hw->hw_id);
|
||||
}
|
||||
}
|
||||
|
||||
hs |= (src->swizzle << NV40_VP_SRC_SWZ_ALL_SHIFT);
|
||||
if (src->negate)
|
||||
hs |= NV40_VP_SRC_NEGATE;
|
||||
|
||||
return hs;
|
||||
}
|
||||
|
||||
static void
|
||||
instruction_store(nouveau_vertex_program *vp, unsigned int inst[])
|
||||
{
|
||||
if ((vp->inst_count+1) > vp->insns_alloced) {
|
||||
vp->insns = realloc(vp->insns, sizeof(unsigned int) * (vp->inst_count+1) * 4);
|
||||
vp->insns_alloced = vp->inst_count+1;
|
||||
}
|
||||
vp->insns[(vp->inst_count*4) + 0] = inst[0];
|
||||
vp->insns[(vp->inst_count*4) + 1] = inst[1];
|
||||
vp->insns[(vp->inst_count*4) + 2] = inst[2];
|
||||
vp->insns[(vp->inst_count*4) + 3] = inst[3];
|
||||
vp->inst_count++;
|
||||
}
|
||||
|
||||
static void
|
||||
emit_arith(nouveau_vertex_program *vp, int op,
|
||||
nouveau_dstreg *dest,
|
||||
nouveau_srcreg *src0,
|
||||
nouveau_srcreg *src1,
|
||||
nouveau_srcreg *src2,
|
||||
int flags)
|
||||
{
|
||||
nouveau_regrec *hwdest = dest->hw;
|
||||
unsigned int hs0, hs1, hs2;
|
||||
unsigned int hop[4] = { 0, 0, 0, 0 };
|
||||
int insrc = -1, constsrc = -1;
|
||||
|
||||
/* Calculate source reg state */
|
||||
hs0 = src_to_hw(vp, src0, &insrc, &constsrc);
|
||||
hs1 = src_to_hw(vp, src1, &insrc, &constsrc);
|
||||
hs2 = src_to_hw(vp, src2, &insrc, &constsrc);
|
||||
|
||||
/* Append it to the instruction */
|
||||
hop[1] |= (((hs0 & NV40_VP_SRC0_HIGH_MASK) >> NV40_VP_SRC0_HIGH_SHIFT)
|
||||
<< NV40_VP_INST_SRC0H_SHIFT);
|
||||
hop[2] |= ((hs0 & NV40_VP_SRC0_LOW_MASK) << NV40_VP_INST_SRC0L_SHIFT) |
|
||||
(hs1 << NV40_VP_INST_SRC1_SHIFT) |
|
||||
(((hs2 & NV40_VP_SRC2_HIGH_MASK) >> NV40_VP_SRC2_HIGH_SHIFT)
|
||||
<< NV40_VP_INST_SRC2H_SHIFT);
|
||||
hop[3] |= (hs2 & NV40_VP_SRC2_LOW_MASK) << NV40_VP_INST_SRC2L_SHIFT;
|
||||
|
||||
/* bits 127:96 */
|
||||
hop[0] |= (dest->condtest << NV40_VP_INST_COND_SHIFT) |
|
||||
(dest->condswz << NV40_VP_INST_COND_SWZ_ALL_SHIFT);
|
||||
if (dest->condtest != NV40_VP_INST_COND_TR)
|
||||
hop[0] |= NV40_VP_INST_COND_TEST_ENABLE;
|
||||
if (dest->condreg) hop[0] |= NV40_VP_INST_COND_REG_SELECT_1;
|
||||
if (dest->condup ) hop[0] |= NV40_VP_INST_COND_UPDATE_ENABLE;
|
||||
|
||||
if (hwdest == NULL /* write output */)
|
||||
hop[0] |= NV40_VP_INST0_UNK0;
|
||||
else {
|
||||
if (hwdest->hw_id == -1)
|
||||
hwdest->hw_id = alloc_hw_temp(vp);
|
||||
|
||||
hop[0] |= (hwdest->hw_id << NV40_VP_INST_DEST_TEMP_SHIFT);
|
||||
if (flags & NOUVEAU_OUT_ABS)
|
||||
hop[0] |= NV40_VP_INST_DEST_TEMP_ABS;
|
||||
|
||||
nvsBitSet(vp->hwtemps_written, hwdest->hw_id);
|
||||
if (--hwdest->ref == 0)
|
||||
free_hw_temp(vp, hwdest->hw_id);
|
||||
}
|
||||
|
||||
/* bits 95:64 */
|
||||
if (constsrc == -1) constsrc = 0;
|
||||
if (insrc == -1) insrc = 0;
|
||||
|
||||
constsrc &= 0xFF;
|
||||
insrc &= 0x0F;
|
||||
hop[1] |= (op << NV40_VP_INST_OPCODE_SHIFT) |
|
||||
(constsrc << NV40_VP_INST_CONST_SRC_SHIFT) |
|
||||
(insrc << NV40_VP_INST_INPUT_SRC_SHIFT);
|
||||
|
||||
/* bits 31:0 */
|
||||
if (hwdest == NULL) {
|
||||
hop[3] |= (dest->mask | (dest->idx << NV40_VP_INST_DEST_SHIFT));
|
||||
} else {
|
||||
hop[3] |= (dest->mask | (NV40_VP_INST_DEST_TEMP << NV40_VP_INST_DEST_SHIFT));
|
||||
}
|
||||
hop[3] |= (0x3F << 7); /*FIXME: what is this?*/
|
||||
|
||||
printf("0x%08x\n", hop[0]);
|
||||
printf("0x%08x\n", hop[1]);
|
||||
printf("0x%08x\n", hop[2]);
|
||||
printf("0x%08x\n", hop[3]);
|
||||
|
||||
instruction_store(vp, hop);
|
||||
}
|
||||
|
||||
static int
|
||||
t_swizzle(GLuint swz)
|
||||
{
|
||||
int x, y, z, w;
|
||||
x = GET_SWZ(swz, 0);
|
||||
y = GET_SWZ(swz, 1);
|
||||
z = GET_SWZ(swz, 2);
|
||||
w = GET_SWZ(swz, 3);
|
||||
|
||||
if ((x<SWIZZLE_ZERO) &&
|
||||
(y<SWIZZLE_ZERO) &&
|
||||
(z<SWIZZLE_ZERO) &&
|
||||
(w<SWIZZLE_ZERO))
|
||||
return (x << 6) | (y << 4) | (z << 2) | w;
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void
|
||||
t_src_reg(nouveau_vertex_program *vp, struct prog_src_register *src,
|
||||
nouveau_srcreg *ns)
|
||||
{
|
||||
switch (src->File) {
|
||||
case PROGRAM_TEMPORARY:
|
||||
ns->hw = &vp->temps[src->Index];
|
||||
break;
|
||||
case PROGRAM_INPUT:
|
||||
ns->hw = &vp->inputs[src->Index];
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Unhandled source register file!\n");
|
||||
break;
|
||||
}
|
||||
|
||||
ns->swizzle = t_swizzle(src->Swizzle);
|
||||
if ((src->NegateBase != 0xF && src->NegateBase != 0x0) ||
|
||||
ns->swizzle == -1) {
|
||||
WARN_ONCE("Unhandled source swizzle/negate, results will be incorrect\n");
|
||||
ns->swizzle = 0x1B; // 00 01 10 11 - XYZW
|
||||
ns->negate = (src->NegateBase) ? 1 : 0;
|
||||
} else
|
||||
ns->negate = (src->NegateBase) ? 1 : 0;
|
||||
|
||||
}
|
||||
|
||||
static int
|
||||
t_dst_mask(int mask)
|
||||
{
|
||||
int hwmask = 0;
|
||||
|
||||
if (mask & WRITEMASK_X) hwmask |= NV40_VP_INST_WRITEMASK_X;
|
||||
if (mask & WRITEMASK_Y) hwmask |= NV40_VP_INST_WRITEMASK_Y;
|
||||
if (mask & WRITEMASK_Z) hwmask |= NV40_VP_INST_WRITEMASK_Z;
|
||||
if (mask & WRITEMASK_W) hwmask |= NV40_VP_INST_WRITEMASK_W;
|
||||
|
||||
return hwmask;
|
||||
}
|
||||
|
||||
static int
|
||||
t_dst_index(int idx)
|
||||
{
|
||||
int hwidx;
|
||||
|
||||
switch (idx) {
|
||||
case VERT_RESULT_HPOS:
|
||||
return NV40_VP_INST_DEST_POS;
|
||||
case VERT_RESULT_COL0:
|
||||
return NV40_VP_INST_DEST_COL0;
|
||||
case VERT_RESULT_COL1:
|
||||
return NV40_VP_INST_DEST_COL1;
|
||||
case VERT_RESULT_FOGC:
|
||||
return NV40_VP_INST_DEST_FOGC;
|
||||
case VERT_RESULT_TEX0:
|
||||
case VERT_RESULT_TEX1:
|
||||
case VERT_RESULT_TEX2:
|
||||
case VERT_RESULT_TEX3:
|
||||
case VERT_RESULT_TEX4:
|
||||
case VERT_RESULT_TEX5:
|
||||
case VERT_RESULT_TEX6:
|
||||
case VERT_RESULT_TEX7:
|
||||
return NV40_VP_INST_DEST_TC(idx - VERT_RESULT_TEX0);
|
||||
case VERT_RESULT_PSIZ:
|
||||
return NV40_VP_INST_DEST_PSZ;
|
||||
case VERT_RESULT_BFC0:
|
||||
return NV40_VP_INST_DEST_BFC0;
|
||||
case VERT_RESULT_BFC1:
|
||||
return NV40_VP_INST_DEST_BFC1;
|
||||
default:
|
||||
fprintf(stderr, "Unknown result reg index!\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
t_cond_test(GLuint test)
|
||||
{
|
||||
switch(test) {
|
||||
case COND_GT: return NV40_VP_INST_COND_GT;
|
||||
case COND_EQ: return NV40_VP_INST_COND_EQ;
|
||||
case COND_LT: return NV40_VP_INST_COND_LT;
|
||||
case COND_GE: return NV40_VP_INST_COND_GE;
|
||||
case COND_LE: return NV40_VP_INST_COND_LE;
|
||||
case COND_NE: return NV40_VP_INST_COND_NE;
|
||||
case COND_TR: return NV40_VP_INST_COND_TR;
|
||||
case COND_FL: return NV40_VP_INST_COND_FL;
|
||||
default:
|
||||
WARN_ONCE("unknown CondMask!\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#define ARITH_1OP(op) do { \
|
||||
t_src_reg(vp, &vpi->SrcReg[0], &src0); \
|
||||
emit_arith(vp, op, &dest, &src0, NULL, NULL, 0); \
|
||||
} while(0);
|
||||
#define ARITH_1OP_SCALAR(op) do { \
|
||||
t_src_reg(vp, &vpi->SrcReg[0], &src0); \
|
||||
emit_arith(vp, op, &dest, NULL, NULL, &src0, 0); \
|
||||
} while(0);
|
||||
#define ARITH_2OP(op) do { \
|
||||
t_src_reg(vp, &vpi->SrcReg[0], &src0); \
|
||||
t_src_reg(vp, &vpi->SrcReg[1], &src1); \
|
||||
emit_arith(vp, op, &dest, &src0, &src1, NULL, 0); \
|
||||
} while(0);
|
||||
#define ARITH_3OP(op) do { \
|
||||
t_src_reg(vp, &vpi->SrcReg[0], &src0); \
|
||||
t_src_reg(vp, &vpi->SrcReg[1], &src1); \
|
||||
t_src_reg(vp, &vpi->SrcReg[2], &src2); \
|
||||
emit_arith(vp, op, &dest, &src0, &src1, &src2, 0); \
|
||||
} while(0);
|
||||
|
||||
static int
|
||||
translate(nouveau_vertex_program *vp)
|
||||
{
|
||||
struct vertex_program *mvp = &vp->mesa_program;
|
||||
struct prog_instruction *vpi;
|
||||
|
||||
|
||||
for (vpi=mvp->Base.Instructions; vpi->Opcode!=OPCODE_END; vpi++) {
|
||||
nouveau_srcreg src0, src1, src2, sT0;
|
||||
nouveau_dstreg dest, dT0;
|
||||
|
||||
switch (vpi->DstReg.File) {
|
||||
case PROGRAM_OUTPUT:
|
||||
make_dstreg(vp, &dest, HW_OUTPUT, t_dst_index(vpi->DstReg.Index));
|
||||
break;
|
||||
case PROGRAM_TEMPORARY:
|
||||
make_dstreg(vp, &dest, HW_TEMP, vpi->DstReg.Index);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
dest.mask = t_dst_mask(vpi->DstReg.WriteMask);
|
||||
dest.condtest = t_cond_test(vpi->DstReg.CondMask);
|
||||
dest.condswz = t_swizzle(vpi->DstReg.CondSwizzle);
|
||||
dest.condreg = vpi->DstReg.CondSrc;
|
||||
|
||||
switch (vpi->Opcode) {
|
||||
/* ARB_vertex_program requirements */
|
||||
case OPCODE_ABS:
|
||||
t_src_reg(vp, &vpi->SrcReg[0], &src0);
|
||||
emit_arith(vp, NV40_VP_INST_OP_MOV, &dest,
|
||||
&src0, NULL, NULL,
|
||||
NOUVEAU_OUT_ABS
|
||||
);
|
||||
break;
|
||||
case OPCODE_ADD:
|
||||
t_src_reg(vp, &vpi->SrcReg[0], &src0);
|
||||
t_src_reg(vp, &vpi->SrcReg[1], &src1);
|
||||
emit_arith(vp, NV40_VP_INST_OP_ADD, &dest,
|
||||
&src0, NULL, &src1,
|
||||
0
|
||||
);
|
||||
break;
|
||||
case OPCODE_ARL:
|
||||
break;
|
||||
case OPCODE_DP3:
|
||||
ARITH_2OP(NV40_VP_INST_OP_DP3);
|
||||
break;
|
||||
case OPCODE_DP4:
|
||||
ARITH_2OP(NV40_VP_INST_OP_DP4);
|
||||
break;
|
||||
case OPCODE_DPH:
|
||||
ARITH_2OP(NV40_VP_INST_OP_DPH);
|
||||
break;
|
||||
case OPCODE_DST:
|
||||
ARITH_2OP(NV40_VP_INST_OP_DST);
|
||||
break;
|
||||
case OPCODE_EX2:
|
||||
ARITH_1OP_SCALAR(NV40_VP_INST_OP_EX2);
|
||||
break;
|
||||
case OPCODE_EXP:
|
||||
ARITH_1OP_SCALAR(NV40_VP_INST_OP_EXP);
|
||||
break;
|
||||
case OPCODE_FLR:
|
||||
ARITH_1OP(NV40_VP_INST_OP_FLR);
|
||||
break;
|
||||
case OPCODE_FRC:
|
||||
ARITH_1OP(NV40_VP_INST_OP_FRC);
|
||||
break;
|
||||
case OPCODE_LG2:
|
||||
ARITH_1OP_SCALAR(NV40_VP_INST_OP_LG2);
|
||||
break;
|
||||
case OPCODE_LIT:
|
||||
t_src_reg(vp, &vpi->SrcReg[0], &src0);
|
||||
t_src_reg(vp, &vpi->SrcReg[1], &src1);
|
||||
t_src_reg(vp, &vpi->SrcReg[2], &src2);
|
||||
emit_arith(vp, NV40_VP_INST_OP_LIT, &dest,
|
||||
&src0, &src1, &src2,
|
||||
0
|
||||
);
|
||||
break;
|
||||
case OPCODE_LOG:
|
||||
ARITH_1OP_SCALAR(NV40_VP_INST_OP_LOG);
|
||||
break;
|
||||
case OPCODE_MAD:
|
||||
ARITH_3OP(NV40_VP_INST_OP_MAD);
|
||||
break;
|
||||
case OPCODE_MAX:
|
||||
ARITH_2OP(NV40_VP_INST_OP_MAX);
|
||||
break;
|
||||
case OPCODE_MIN:
|
||||
ARITH_2OP(NV40_VP_INST_OP_MIN);
|
||||
break;
|
||||
case OPCODE_MOV:
|
||||
ARITH_1OP(NV40_VP_INST_OP_MOV);
|
||||
break;
|
||||
case OPCODE_MUL:
|
||||
ARITH_2OP(NV40_VP_INST_OP_MOV);
|
||||
break;
|
||||
case OPCODE_POW:
|
||||
t_src_reg(vp, &vpi->SrcReg[0], &src0);
|
||||
t_src_reg(vp, &vpi->SrcReg[1], &src1);
|
||||
make_dstreg(vp, &dT0, HW_TEMP, -1);
|
||||
make_srcreg(vp, &sT0, HW_TEMP, dT0.idx);
|
||||
|
||||
dT0.mask = t_dst_mask(WRITEMASK_X);
|
||||
emit_arith(vp, NV40_VP_INST_OP_LG2, &dT0,
|
||||
NULL, NULL, &src0,
|
||||
0);
|
||||
sT0.swizzle = 0x0; /* 00000000 - XXXX */
|
||||
emit_arith(vp, NV40_VP_INST_OP_MUL, &dT0,
|
||||
&sT0, &src1, NULL,
|
||||
0);
|
||||
emit_arith(vp, NV40_VP_INST_OP_EX2, &dest,
|
||||
NULL, NULL, &sT0,
|
||||
0);
|
||||
break;
|
||||
case OPCODE_RCP:
|
||||
ARITH_1OP_SCALAR(NV40_VP_INST_OP_RCP);
|
||||
break;
|
||||
case OPCODE_RSQ:
|
||||
ARITH_1OP_SCALAR(NV40_VP_INST_OP_RSQ);
|
||||
break;
|
||||
case OPCODE_SGE:
|
||||
ARITH_2OP(NV40_VP_INST_OP_SGE);
|
||||
break;
|
||||
case OPCODE_SLT:
|
||||
ARITH_2OP(NV40_VP_INST_OP_SLT);
|
||||
break;
|
||||
case OPCODE_SUB:
|
||||
t_src_reg(vp, &vpi->SrcReg[0], &src0);
|
||||
t_src_reg(vp, &vpi->SrcReg[1], &src1);
|
||||
src1.negate = !src1.negate;
|
||||
|
||||
emit_arith(vp, NV40_VP_INST_OP_ADD, &dest,
|
||||
&src0, NULL, &src1,
|
||||
0
|
||||
);
|
||||
break;
|
||||
case OPCODE_SWZ:
|
||||
ARITH_1OP(NV40_VP_INST_OP_MOV);
|
||||
break;
|
||||
|
||||
case OPCODE_XPD:
|
||||
break;
|
||||
/* NV_vertex_program3 requirements */
|
||||
case OPCODE_SEQ:
|
||||
ARITH_2OP(NV40_VP_INST_OP_SEQ);
|
||||
break;
|
||||
case OPCODE_SFL:
|
||||
ARITH_2OP(NV40_VP_INST_OP_SFL);
|
||||
break;
|
||||
case OPCODE_SGT:
|
||||
ARITH_2OP(NV40_VP_INST_OP_SGT);
|
||||
break;
|
||||
case OPCODE_SLE:
|
||||
ARITH_2OP(NV40_VP_INST_OP_SLE);
|
||||
break;
|
||||
case OPCODE_SNE:
|
||||
ARITH_2OP(NV40_VP_INST_OP_SNE);
|
||||
break;
|
||||
case OPCODE_STR:
|
||||
ARITH_2OP(NV40_VP_INST_OP_STR);
|
||||
break;
|
||||
case OPCODE_SSG:
|
||||
ARITH_1OP(NV40_VP_INST_OP_SSG);
|
||||
break;
|
||||
case OPCODE_ARL_NV:
|
||||
break;
|
||||
case OPCODE_ARR:
|
||||
break;
|
||||
case OPCODE_ARA:
|
||||
break;
|
||||
case OPCODE_RCC:
|
||||
ARITH_1OP_SCALAR(NV40_VP_INST_OP_SSG);
|
||||
break;
|
||||
case OPCODE_BRA:
|
||||
break;
|
||||
case OPCODE_CAL:
|
||||
break;
|
||||
case OPCODE_RET:
|
||||
break;
|
||||
case OPCODE_PUSHA:
|
||||
break;
|
||||
case OPCODE_POPA:
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Pre-init vertex program
|
||||
* - Grab reference counts on temps
|
||||
* - Where multiple inputs are used in a single instruction,
|
||||
* emit instructions to move the extras into temps
|
||||
*/
|
||||
static int
|
||||
init(nouveau_vertex_program *vp)
|
||||
{
|
||||
struct vertex_program *mvp = &vp->mesa_program;
|
||||
struct prog_instruction *vpi;
|
||||
int i;
|
||||
|
||||
nvsRecInit(&vp->temps_in_use, 64);
|
||||
nvsRecInit(&vp->hwtemps_written, 64);
|
||||
nvsRecInit(&vp->hwtemps_in_use , 64);
|
||||
|
||||
for (vpi=mvp->Base.Instructions; vpi->Opcode!=OPCODE_END; vpi++) {
|
||||
int in_done = 0;
|
||||
int in_idx;
|
||||
|
||||
for (i=0;i<3;i++) {
|
||||
struct prog_src_register *src = &vpi->SrcReg[i];
|
||||
/*FIXME: does not handle relative addressing!*/
|
||||
int idx = src->Index;
|
||||
|
||||
switch (src->File) {
|
||||
case PROGRAM_TEMPORARY:
|
||||
vp->temps[idx].file = HW_TEMP;
|
||||
vp->temps[idx].hw_id = -1;
|
||||
vp->temps[idx].ref++;
|
||||
nvsBitSet(vp->temps_in_use, idx);
|
||||
break;
|
||||
case PROGRAM_INPUT:
|
||||
if (vp->inputs[idx].file == HW_TEMP) {
|
||||
vp->inputs[idx].ref++;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!in_done || (in_idx == idx)) {
|
||||
vp->inputs[idx].file = HW_INPUT;
|
||||
vp->inputs[idx].hw_id = idx;
|
||||
vp->inputs[idx].ref++;
|
||||
in_done = 1;
|
||||
in_idx = idx;
|
||||
} else {
|
||||
vp->inputs[idx].file = HW_TEMP;
|
||||
vp->inputs[idx].ref++;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
switch (vpi->DstReg.File) {
|
||||
case PROGRAM_TEMPORARY:
|
||||
vp->temps[vpi->DstReg.Index].file = HW_TEMP;
|
||||
vp->temps[vpi->DstReg.Index].hw_id = -1;
|
||||
vp->temps[vpi->DstReg.Index].ref++;
|
||||
nvsBitSet(vp->temps_in_use, vpi->DstReg.Index);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now we can move any inputs that need it into temps */
|
||||
for (i=0; i<14; i++) {
|
||||
if (vp->inputs[i].file == HW_TEMP) {
|
||||
nouveau_srcreg src;
|
||||
nouveau_dstreg dest;
|
||||
|
||||
make_dstreg(vp, &dest, HW_TEMP , -1);
|
||||
make_srcreg(vp, &src , HW_INPUT, i);
|
||||
|
||||
emit_arith(vp, NV40_VP_INST_OP_MOV, &dest,
|
||||
&src, NULL, NULL,
|
||||
0
|
||||
);
|
||||
|
||||
vp->inputs[i].file = HW_TEMP;
|
||||
vp->inputs[i].hw_id = dest.hw->hw_id;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
nv40TranslateVertexProgram(nouveau_vertex_program *vp)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = init(vp);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = translate(vp);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
nouveau_vertex_program *vp = calloc(1, sizeof(nouveau_vertex_program));
|
||||
struct vertex_program *mvp = &vp->mesa_program;
|
||||
struct prog_instruction inst[3];
|
||||
|
||||
/*
|
||||
"ADD t0, vertex.color, vertex.position;\n"
|
||||
"ADD result.position, t0, vertex.position;\n"
|
||||
*/
|
||||
|
||||
inst[0].Opcode = OPCODE_ADD;
|
||||
inst[0].SrcReg[0].File = PROGRAM_INPUT;
|
||||
inst[0].SrcReg[0].Index = VERT_ATTRIB_COLOR0;
|
||||
inst[0].SrcReg[0].NegateBase = 0;
|
||||
inst[0].SrcReg[0].Swizzle = MAKE_SWIZZLE4(0, 1, 2, 3);
|
||||
inst[0].SrcReg[1].File = PROGRAM_INPUT;
|
||||
inst[0].SrcReg[1].Index = VERT_ATTRIB_POS;
|
||||
inst[0].SrcReg[1].NegateBase = 0;
|
||||
inst[0].SrcReg[1].Swizzle = MAKE_SWIZZLE4(0, 1, 2, 3);
|
||||
inst[0].SrcReg[2].File = PROGRAM_UNDEFINED;
|
||||
inst[0].DstReg.File = PROGRAM_TEMPORARY;
|
||||
inst[0].DstReg.Index = 0;
|
||||
inst[0].DstReg.WriteMask = WRITEMASK_XYZW;
|
||||
inst[0].DstReg.CondMask = COND_TR;
|
||||
inst[0].DstReg.CondSwizzle = MAKE_SWIZZLE4(0, 1, 2, 3);
|
||||
inst[0].DstReg.CondSrc = 0;
|
||||
inst[0].CondUpdate = 0;
|
||||
inst[0].CondDst = 0;
|
||||
|
||||
inst[1].Opcode = OPCODE_ADD;
|
||||
inst[1].SrcReg[0].File = PROGRAM_TEMPORARY;
|
||||
inst[1].SrcReg[0].Index = 0;
|
||||
inst[1].SrcReg[0].NegateBase = 0;
|
||||
inst[1].SrcReg[0].Swizzle = MAKE_SWIZZLE4(0, 1, 2, 3);
|
||||
inst[1].SrcReg[1].File = PROGRAM_INPUT;
|
||||
inst[1].SrcReg[1].Index = VERT_ATTRIB_POS;
|
||||
inst[1].SrcReg[1].NegateBase = 0;
|
||||
inst[1].SrcReg[1].Swizzle = MAKE_SWIZZLE4(0, 1, 2, 3);
|
||||
inst[0].SrcReg[2].File = PROGRAM_UNDEFINED;
|
||||
inst[1].DstReg.File = PROGRAM_OUTPUT;
|
||||
inst[1].DstReg.Index = VERT_RESULT_HPOS;
|
||||
inst[1].DstReg.WriteMask = WRITEMASK_XYZW;
|
||||
inst[1].DstReg.CondMask = COND_TR;
|
||||
inst[1].DstReg.CondSwizzle = MAKE_SWIZZLE4(0, 1, 2, 3);
|
||||
inst[1].DstReg.CondSrc = 0;
|
||||
inst[1].CondUpdate = 0;
|
||||
inst[1].CondDst = 0;
|
||||
|
||||
inst[2].Opcode = OPCODE_END;
|
||||
|
||||
mvp->Base.Instructions = inst;
|
||||
|
||||
nv40TranslateVertexProgram(vp);
|
||||
}
|
||||
|
||||
Loading…
Add table
Reference in a new issue