Amusing utility to print ARB_fp programs as C code

This commit is contained in:
Keith Whitwell 2004-03-30 11:12:37 +00:00
parent d1c38f7472
commit 257c085db9

View file

@ -0,0 +1,687 @@
/*
* Mesa 3-D graphics library
* Version: 6.1
*
* Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/* An amusing little utility to print ARB fragment programs out as a C
* function. Resulting code not tested except visually.
*/
#include "glheader.h"
#include "colormac.h"
#include "context.h"
#include "nvfragprog.h"
#include "macros.h"
#include "program.h"
#include "s_nvfragprog.h"
#include "s_span.h"
#include "s_texture.h"
/* UREG - a way of representing an FP source register including
* swizzling and negation in a single GLuint. Major flaw is the
* limitiation to source->Index < 32. Secondary flaw is the fact that
* it's overkill & we could probably just pass around the original
* datatypes instead.
*/
#define UREG_TYPE_TEMP 0
#define UREG_TYPE_INTERP 1
#define UREG_TYPE_LOCAL_CONST 2
#define UREG_TYPE_ENV_CONST 3
#define UREG_TYPE_STATE_CONST 4
#define UREG_TYPE_PARAM 5
#define UREG_TYPE_OUTPUT 6
#define UREG_TYPE_MASK 0x7
#define UREG_TYPE_SHIFT 29
#define UREG_NR_SHIFT 24
#define UREG_NR_MASK 0x1f /* 31 */
#define UREG_CHANNEL_X_NEGATE_SHIFT 23
#define UREG_CHANNEL_X_SHIFT 20
#define UREG_CHANNEL_Y_NEGATE_SHIFT 19
#define UREG_CHANNEL_Y_SHIFT 16
#define UREG_CHANNEL_Z_NEGATE_SHIFT 15
#define UREG_CHANNEL_Z_SHIFT 12
#define UREG_CHANNEL_W_NEGATE_SHIFT 11
#define UREG_CHANNEL_W_SHIFT 8
#define UREG_CHANNEL_ZERO_NEGATE_MBZ 5
#define UREG_CHANNEL_ZERO_SHIFT 4
#define UREG_CHANNEL_ONE_NEGATE_MBZ 1
#define UREG_CHANNEL_ONE_SHIFT 0
#define UREG_BAD 0xffffffff /* not a valid ureg */
#define _X 0
#define _Y 1
#define _Z 2
#define _W 3
#define _ZERO 4 /* NOTE! */
#define _ONE 5 /* NOTE! */
/* Construct a ureg:
*/
#define UREG( type, nr ) (((type)<< UREG_TYPE_SHIFT) | \
((nr) << UREG_NR_SHIFT) | \
(_X << UREG_CHANNEL_X_SHIFT) | \
(_Y << UREG_CHANNEL_Y_SHIFT) | \
(_Z << UREG_CHANNEL_Z_SHIFT) | \
(_W << UREG_CHANNEL_W_SHIFT) | \
(_ZERO << UREG_CHANNEL_ZERO_SHIFT) | \
(_ONE << UREG_CHANNEL_ONE_SHIFT))
#define GET_CHANNEL_SRC( reg, channel ) ((reg<<(channel*4)) & \
(0xf<<UREG_CHANNEL_X_SHIFT))
#define CHANNEL_SRC( src, channel ) (src>>(channel*4))
#define GET_UREG_TYPE(reg) (((reg)>>UREG_TYPE_SHIFT)&UREG_TYPE_MASK)
#define GET_UREG_NR(reg) (((reg)>>UREG_NR_SHIFT)&UREG_NR_MASK)
#define UREG_XYZW_CHANNEL_MASK 0x00ffff00
#define deref(reg,pos) swizzle(reg, pos, pos, pos, pos)
static __inline int is_swizzled( int reg )
{
return ((reg & UREG_XYZW_CHANNEL_MASK) !=
(UREG(0,0) & UREG_XYZW_CHANNEL_MASK));
}
/* One neat thing about the UREG representation:
*/
static __inline int swizzle( int reg, int x, int y, int z, int w )
{
return ((reg & ~UREG_XYZW_CHANNEL_MASK) |
CHANNEL_SRC( GET_CHANNEL_SRC( reg, x ), 0 ) |
CHANNEL_SRC( GET_CHANNEL_SRC( reg, y ), 1 ) |
CHANNEL_SRC( GET_CHANNEL_SRC( reg, z ), 2 ) |
CHANNEL_SRC( GET_CHANNEL_SRC( reg, w ), 3 ));
}
/* Another neat thing about the UREG representation:
*/
static __inline int negate( int reg, int x, int y, int z, int w )
{
return reg ^ (((x&1)<<UREG_CHANNEL_X_NEGATE_SHIFT)|
((y&1)<<UREG_CHANNEL_Y_NEGATE_SHIFT)|
((z&1)<<UREG_CHANNEL_Z_NEGATE_SHIFT)|
((w&1)<<UREG_CHANNEL_W_NEGATE_SHIFT));
}
static GLuint src_reg_file( GLuint file )
{
switch (file) {
case PROGRAM_TEMPORARY: return UREG_TYPE_TEMP;
case PROGRAM_INPUT: return UREG_TYPE_INTERP;
case PROGRAM_LOCAL_PARAM: return UREG_TYPE_LOCAL_CONST;
case PROGRAM_ENV_PARAM: return UREG_TYPE_ENV_CONST;
case PROGRAM_STATE_VAR: return UREG_TYPE_STATE_CONST;
case PROGRAM_NAMED_PARAM: return UREG_TYPE_PARAM;
default: return UREG_BAD;
}
}
/**
* Retrieve a ureg for the given source register. Will emit
* constants, apply swizzling and negation as needed.
*/
static GLuint src_vector( const struct fp_src_register *source )
{
GLuint src;
/* fprintf(stderr, "%s File %d, Index %d\n", */
/* __FUNCTION__, source->File, source->Index); */
assert(source->Index < 32); /* limitiation of UREG representation */
src = UREG( src_reg_file( source->File ), source->Index );
src = swizzle(src,
_X + source->Swizzle[0],
_X + source->Swizzle[1],
_X + source->Swizzle[2],
_X + source->Swizzle[3]);
if (source->NegateBase)
src = negate( src, 1,1,1,1 );
return src;
}
static void print_header( void )
{
printf("static void run_program( const GLfloat (*local_param)[4], \n"
" const GLfloat (*env_param)[4], \n"
" const GLfloat (*state_param)[4], \n"
" const GLfloat (*interp)[4], \n"
" GLfloat *outputs)\n"
"{\n"
" GLfloat temp[32][4];\n"
);
}
static void print_footer( void )
{
printf("}\n");
}
static void print_dest_reg( const struct fp_instruction *inst )
{
switch (inst->DstReg.File) {
case PROGRAM_OUTPUT:
printf("outputs[%d]", inst->DstReg.Index);
break;
case PROGRAM_TEMPORARY:
printf("temp[%d]", inst->DstReg.Index);
break;
default:
break;
}
}
static void print_dest( const struct fp_instruction *inst,
GLuint idx )
{
print_dest_reg(inst);
printf("[%d]", idx);
}
#define UREG_SRC0(reg) (((reg)>>UREG_CHANNEL_X_SHIFT) & 0x7)
static void print_reg( GLuint arg )
{
switch (GET_UREG_TYPE(arg)) {
case UREG_TYPE_TEMP: printf("temp"); break;
case UREG_TYPE_INTERP: printf("interp"); break;
case UREG_TYPE_LOCAL_CONST: printf("local_const"); break;
case UREG_TYPE_ENV_CONST: printf("env_const"); break;
case UREG_TYPE_STATE_CONST: printf("state_const"); break;
case UREG_TYPE_PARAM: printf("param"); break;
};
printf("[%d]", GET_UREG_NR(arg));
}
static void print_arg( const struct fragment_program *p,
GLuint arg )
{
GLuint src = UREG_SRC0(arg);
if (src == _ZERO) {
printf("0");
return;
}
if (arg & (1<<UREG_CHANNEL_X_NEGATE_SHIFT))
printf("-");
if (src == _ONE) {
printf("1");
return;
}
if (GET_UREG_TYPE(arg) == UREG_TYPE_STATE_CONST) {
printf("%g", p->Parameters->Parameters[GET_UREG_NR(arg)].Values[src]);
return;
}
print_reg( arg );
switch (src){
case _X: printf("[0]"); break;
case _Y: printf("[1]"); break;
case _Z: printf("[2]"); break;
case _W: printf("[3]"); break;
}
}
/* This is where the handling of expressions breaks down into string
* processing:
*/
static void print_expression( const struct fragment_program *p,
GLuint i,
const char *fmt,
va_list ap )
{
while (*fmt) {
if (*fmt == '%' && *(fmt+1) == 's') {
int reg = va_arg(ap, int);
/* Use of deref() is a bit of a hack:
*/
print_arg( p, deref(reg, i) );
fmt += 2;
}
else {
putchar(*fmt);
fmt++;
}
}
printf(";\n");
}
static void do_tex_simple( const struct fragment_program *p,
const struct fp_instruction *inst,
const char *fn, GLuint texunit, GLuint arg )
{
printf(" %s( ctx, ", fn);
print_reg(arg);
printf(", %d, ", texunit );
print_dest_reg(inst);
printf(");\n");
}
static void do_tex( const struct fragment_program *p,
const struct fp_instruction *inst,
const char *fn, GLuint texunit, GLuint arg )
{
GLuint i;
GLboolean need_tex = GL_FALSE, need_result = GL_FALSE;
for (i = 0; i < 4; i++)
if (!inst->DstReg.WriteMask[i])
need_result = GL_TRUE;
if (is_swizzled(arg))
need_tex = GL_TRUE;
if (!need_tex && !need_result) {
do_tex_simple( p, inst, fn, texunit, arg );
return;
}
printf(" {\n");
printf(" GLfloat texcoord[4];\n");
printf(" GLfloat result[4];\n");
for (i = 0; i < 4; i++) {
printf(" texcoord[%d] = ", i);
print_arg( p, deref(arg, i) );
printf(";\n");
}
printf(" %s( ctx, texcoord, %d, result);\n", fn, texunit );
for (i = 0; i < 4; i++) {
if (inst->DstReg.WriteMask[i]) {
printf(" ");
print_dest(inst, i);
printf(" = result[%d];\n", i);
}
}
printf(" }\n");
}
static void assign_single( GLuint i,
const struct fragment_program *p,
const struct fp_instruction *inst,
const char *fmt,
... )
{
va_list ap;
va_start( ap, fmt );
if (inst->DstReg.WriteMask[i]) {
printf(" ");
print_dest(inst, i);
printf(" = ");
print_expression( p, i, fmt, ap);
}
va_end( ap );
}
static void assign4( const struct fragment_program *p,
const struct fp_instruction *inst,
const char *fmt,
... )
{
GLuint i;
va_list ap;
va_start( ap, fmt );
for (i = 0; i < 4; i++)
if (inst->DstReg.WriteMask[i]) {
printf(" ");
print_dest(inst, i);
printf(" = ");
print_expression( p, i, fmt, ap);
}
va_end( ap );
}
static void assign4_replicate( const struct fragment_program *p,
const struct fp_instruction *inst,
const char *fmt,
... )
{
GLuint i;
GLboolean ok = 0;
va_list ap;
for (i = 0; i < 4; i++)
if (inst->DstReg.WriteMask[i])
ok = 1;
if (!ok) return;
va_start( ap, fmt );
printf(" ");
for (i = 0; i < 4; i++)
if (inst->DstReg.WriteMask[i]) {
print_dest(inst, i);
printf(" = ");
}
print_expression( p, 0, fmt, ap);
va_end( ap );
}
static GLuint nr_args( GLuint opcode )
{
switch (opcode) {
case FP_OPCODE_ABS: return 1;
case FP_OPCODE_ADD: return 2;
case FP_OPCODE_CMP: return 3;
case FP_OPCODE_COS: return 1;
case FP_OPCODE_DP3: return 2;
case FP_OPCODE_DP4: return 2;
case FP_OPCODE_DPH: return 2;
case FP_OPCODE_DST: return 2;
case FP_OPCODE_EX2: return 1;
case FP_OPCODE_FLR: return 1;
case FP_OPCODE_FRC: return 1;
case FP_OPCODE_KIL: return 1;
case FP_OPCODE_LG2: return 1;
case FP_OPCODE_LIT: return 1;
case FP_OPCODE_LRP: return 3;
case FP_OPCODE_MAD: return 3;
case FP_OPCODE_MAX: return 2;
case FP_OPCODE_MIN: return 2;
case FP_OPCODE_MOV: return 1;
case FP_OPCODE_MUL: return 2;
case FP_OPCODE_POW: return 2;
case FP_OPCODE_RCP: return 1;
case FP_OPCODE_RSQ: return 1;
case FP_OPCODE_SCS: return 1;
case FP_OPCODE_SGE: return 2;
case FP_OPCODE_SIN: return 1;
case FP_OPCODE_SLT: return 2;
case FP_OPCODE_SUB: return 2;
case FP_OPCODE_SWZ: return 1;
case FP_OPCODE_TEX: return 1;
case FP_OPCODE_TXB: return 1;
case FP_OPCODE_TXP: return 1;
case FP_OPCODE_XPD: return 2;
default: return 0;
}
}
static void upload_program( const struct fragment_program *p )
{
const struct fp_instruction *inst = p->Instructions;
for (; inst->Opcode != FP_OPCODE_END; inst++) {
GLuint src[3], i;
GLuint nr = nr_args( inst->Opcode );
for (i = 0; i < nr; i++)
src[i] = src_vector( &inst->SrcReg[i] );
switch (inst->Opcode) {
case FP_OPCODE_ABS:
assign4(p, inst, "FABSF(%s)", src[0]);
break;
case FP_OPCODE_ADD:
assign4(p, inst, "%s + %s", src[0], src[1]);
break;
case FP_OPCODE_CMP:
assign4(p, inst, "%s < 0.0F ? %s : %s", src[0], src[1], src[2]);
break;
case FP_OPCODE_COS:
assign4_replicate(p, inst, "COS(%s)", src[0]);
break;
case FP_OPCODE_DP3:
assign4_replicate(p, inst,
"%s*%s + %s*%s + %s*%s",
deref(src[0],_X),
deref(src[1],_X),
deref(src[0],_Y),
deref(src[1],_Y),
deref(src[0],_Z),
deref(src[1],_Z));
break;
case FP_OPCODE_DP4:
assign4_replicate(p, inst,
"%s*%s + %s*%s + %s*%s + %s*%s",
deref(src[0],_X),
deref(src[1],_X),
deref(src[0],_Y),
deref(src[1],_Y),
deref(src[0],_Z),
deref(src[1],_Z));
break;
case FP_OPCODE_DPH:
assign4_replicate(p, inst,
"%s*%s + %s*%s + %s*%s + %s",
deref(src[0],_X),
deref(src[1],_X),
deref(src[0],_Y),
deref(src[1],_Y),
deref(src[1],_Z));
break;
case FP_OPCODE_DST:
/* result[0] = 1 * 1;
* result[1] = a[1] * b[1];
* result[2] = a[2] * 1;
* result[3] = 1 * b[3];
*
* Here we hope that the compiler can optimize away "x*1" to "x".
*/
assign4(p, inst,
"%s*%s",
swizzle(src[0], _ONE, _Y, _Z, _ONE),
swizzle(src[1], _ONE, _Y, _ONE, _W ));
break;
case FP_OPCODE_EX2:
assign4_replicate(p, inst, "EX2(%s)", src[0]);
break;
case FP_OPCODE_FLR:
assign4_replicate(p, inst, "FLR(%s)", src[0]);
break;
case FP_OPCODE_FRC:
assign4_replicate(p, inst, "FRC(%s)", src[0]);
break;
case FP_OPCODE_KIL:
/* TODO */
break;
case FP_OPCODE_LG2:
assign4_replicate(p, inst, "LOG(%s)", deref(src[0], _X));
break;
case FP_OPCODE_LIT:
assign_single(0, p, inst, "1.0");
assign_single(1, p, inst, "MIN2(%s, 0)", deref(src[0], _X));
assign_single(2, p, inst, "(%s > 0.0) ? EXP(%s * MIN2(%s, 0)) : 0.0",
deref(src[0], _X),
deref(src[0], _Z),
deref(src[0], _Y));
assign_single(3, p, inst, "1.0");
break;
case FP_OPCODE_LRP:
assign4(p, inst,
"%s * %s + (1.0 - %s) * %s",
src[0], src[1], src[0], src[2]);
break;
case FP_OPCODE_MAD:
assign4(p, inst, "%s * %s + %s", src[0], src[1], src[2]);
break;
case FP_OPCODE_MAX:
assign4(p, inst, "MAX2(%s, %s)", src[0], src[1]);
break;
case FP_OPCODE_MIN:
assign4(p, inst, "MIN2(%s, %s)", src[0], src[1]);
break;
case FP_OPCODE_MOV:
assign4(p, inst, "%s", src[0]);
break;
case FP_OPCODE_MUL:
assign4(p, inst, "%s * %s", src[0], src[1]);
break;
case FP_OPCODE_POW:
assign4_replicate(p, inst, "POW(%s, %s)",
deref(src[0], _X),
deref(src[1], _X));
break;
case FP_OPCODE_RCP:
assign4_replicate(p, inst, "1.0/%s", deref(src[0], _X));
break;
case FP_OPCODE_RSQ:
assign4_replicate(p, inst, "INV_SQRTF(%s)", deref(src[0], _X));
break;
case FP_OPCODE_SCS:
if (inst->DstReg.WriteMask[0]) {
assign_single(0, p, inst, "COS(%s)", deref(src[0], _X));
}
if (inst->DstReg.WriteMask[1]) {
assign_single(1, p, inst, "SIN(%s)", deref(src[0], _X));
}
break;
case FP_OPCODE_SGE:
assign4(p, inst, "%s >= %s ? 1.0 : 0.0", src[0], src[1]);
break;
case FP_OPCODE_SIN:
assign4_replicate(p, inst, "SIN(%s)", deref(src[0], _X));
break;
case FP_OPCODE_SLT:
assign4(p, inst, "%s < %s ? 1.0 : 0.0", src[0], src[1]);
break;
case FP_OPCODE_SUB:
assign4(p, inst, "%s - %s", src[0], src[1]);
break;
case FP_OPCODE_SWZ: /* same implementation as MOV: */
assign4(p, inst, "%s", src[0]);
break;
case FP_OPCODE_TEX:
do_tex(p, inst, "TEX", inst->TexSrcUnit, src[0]);
break;
case FP_OPCODE_TXB:
do_tex(p, inst, "TXB", inst->TexSrcUnit, src[0]);
break;
case FP_OPCODE_TXP:
do_tex(p, inst, "TXP", inst->TexSrcUnit, src[0]);
break;
case FP_OPCODE_X2D:
/* Cross product:
* result.x = src[0].y * src[1].z - src[0].z * src[1].y;
* result.y = src[0].z * src[1].x - src[0].x * src[1].z;
* result.z = src[0].x * src[1].y - src[0].y * src[1].x;
* result.w = undef;
*/
assign4(p, inst,
"%s * %s - %s * %s",
swizzle(src[0], _Y, _Z, _X, _ONE),
swizzle(src[1], _Z, _X, _Y, _ONE),
swizzle(src[0], _Z, _X, _Y, _ONE),
swizzle(src[1], _Y, _Z, _X, _ONE));
break;
default:
return;
}
}
}
void _swrast_translate_program( GLcontext *ctx )
{
if (ctx->FragmentProgram.Current) {
print_header();
upload_program( ctx->FragmentProgram.Current );
print_footer();
}
}