r300/compiler: Add support for inline literals

On R500 chips, shader instructions can take 7-bit (3-bit mantissa, 4-bit
exponent) floating point values as inputs in place of registers.
This commit is contained in:
Tom Stellard 2012-01-14 08:08:33 -05:00 committed by Tom Stellard
parent 95594bae47
commit befcce264c
12 changed files with 192 additions and 9 deletions

View file

@ -28,6 +28,7 @@ C_SOURCES := \
compiler/radeon_compiler_util.c \
compiler/radeon_emulate_branches.c \
compiler/radeon_emulate_loops.c \
compiler/radeon_inline_literals.c \
compiler/radeon_program.c \
compiler/radeon_program_print.c \
compiler/radeon_opcodes.c \

View file

@ -125,6 +125,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
{"emulate loops", 1, !is_r500, rc_emulate_loops, NULL},
{"register rename", 1, !is_r500 || opt, rc_rename_regs, NULL},
{"dataflow optimize", 1, opt, rc_optimize, NULL},
{"inline literals", 1, is_r500 && opt, rc_inline_literals, NULL},
{"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL},
{"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table},
{"pair translate", 1, 1, rc_pair_translate, NULL},

View file

@ -218,6 +218,8 @@ static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
return 1;
return 0;
} else if (reg.File == RC_FILE_INLINE) {
return 1;
} else {
/* ALU instructions support almost everything */
relevant = 0;

View file

@ -210,6 +210,8 @@ static unsigned int use_source(struct r500_fragment_program_code* code, struct r
} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
use_temporary(code, src.Index);
return src.Index;
} else if (src.File == RC_FILE_INLINE) {
return src.Index | (1 << 7);
}
return 0;

View file

@ -357,21 +357,22 @@ void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
static void reg_count_callback(void * userdata, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
int *max_reg = userdata;
struct rc_program_stats *s = userdata;
if (file == RC_FILE_TEMPORARY)
(int)index > *max_reg ? *max_reg = index : 0;
(int)index > s->num_temp_regs ? s->num_temp_regs = index : 0;
if (file == RC_FILE_INLINE)
s->num_inline_literals++;
}
void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
{
int max_reg = -1;
struct rc_instruction * tmp;
memset(s, 0, sizeof(*s));
for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
tmp = tmp->Next){
const struct rc_opcode_info * info;
rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg);
rc_for_all_reads_mask(tmp, reg_count_callback, s);
if (tmp->Type == RC_INSTRUCTION_NORMAL) {
info = rc_get_opcode_info(tmp->U.I.Opcode);
if (info->Opcode == RC_OPCODE_BEGIN_TEX)
@ -405,7 +406,9 @@ void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
s->num_tex_insts++;
s->num_insts++;
}
s->num_temp_regs = max_reg + 1;
/* Increment here because the reg_count_callback store the max
* temporary reg index in s->nun_temp_regs. */
s->num_temp_regs++;
}
static void print_stats(struct radeon_compiler * c)
@ -437,10 +440,11 @@ static void print_stats(struct radeon_compiler * c)
"~%4u Presub Operations\n"
"~%4u OMOD Operations\n"
"~%4u Temporary Registers\n"
"~%4u Inline Literals\n"
"~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
s.num_insts, s.num_rgb_insts, s.num_alpha_insts,
s.num_fc_insts, s.num_tex_insts, s.num_presub_ops,
s.num_omod_ops, s.num_temp_regs);
s.num_omod_ops, s.num_temp_regs, s.num_inline_literals);
break;
default:
assert(0);

View file

@ -161,6 +161,7 @@ struct rc_program_stats {
unsigned num_presub_ops;
unsigned num_temp_regs;
unsigned num_omod_ops;
unsigned num_inline_literals;
};
void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s);

View file

@ -130,5 +130,6 @@ void rc_dataflow_swizzles(struct radeon_compiler * c, void *user);
/*@}*/
void rc_optimize(struct radeon_compiler * c, void *user);
void rc_inline_literals(struct radeon_compiler *c, void *user);
#endif /* RADEON_DATAFLOW_H */

View file

@ -0,0 +1,140 @@
#include "radeon_compiler.h"
#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
#include "radeon_program.h"
#include "radeon_program_constants.h"
#include <stdio.h>
#define VERBOSE 0
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
/* IEEE-754:
* 22:0 mantissa
* 30:23 exponent
* 31 sign
*
* R300:
* 0:2 mantissa
* 3:6 exponent (bias 7)
*/
static int ieee_754_to_r300_float(float f, unsigned char *r300_float_out)
{
unsigned float_bits = *((unsigned *)&f);
/* XXX: Handle big-endian */
unsigned mantissa = float_bits & 0x007fffff;
unsigned biased_exponent = (float_bits & 0x7f800000) >> 23;
unsigned negate = !!(float_bits & 0x80000000);
int exponent = biased_exponent - 127;
unsigned mantissa_mask = 0xff8fffff;
unsigned r300_exponent, r300_mantissa;
DBG("Converting %f (0x%x) to 7-bit:\n", f, float_bits);
DBG("Raw exponent = %d\n", exponent);
if (exponent < -7 || exponent > 8) {
DBG("Failed exponent out of range\n\n");
return 0;
}
if (mantissa & mantissa_mask) {
DBG("Failed mantisa has too many bits:\n"
"manitssa=0x%x mantissa_mask=0x%x, and=0x%x\n\n",
mantissa, mantissa_mask,
mantissa & mantissa_mask);
return 0;
}
r300_exponent = exponent + 7;
r300_mantissa = (mantissa & ~mantissa_mask) >> 20;
*r300_float_out = r300_mantissa | (r300_exponent << 3);
DBG("Success! r300_float = 0x%x\n\n", *r300_float_out);
if (negate)
return -1;
else
return 1;
}
void rc_inline_literals(struct radeon_compiler *c, void *user)
{
struct rc_instruction * inst;
for(inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
const struct rc_opcode_info * info =
rc_get_opcode_info(inst->U.I.Opcode);
unsigned src_idx;
struct rc_constant * constant;
float float_value;
unsigned char r300_float;
int ret;
/* XXX: Handle presub */
/* We aren't using rc_for_all_reads_src here, because presub
* sources need to be handled differently. */
for (src_idx = 0; src_idx < info->NumSrcRegs; src_idx++) {
unsigned new_swizzle;
unsigned use_literal = 0;
unsigned negate_mask = 0;
unsigned swz, chan;
struct rc_src_register * src_reg =
&inst->U.I.SrcReg[src_idx];
swz = RC_SWIZZLE_UNUSED;
if (src_reg->File != RC_FILE_CONSTANT) {
continue;
}
constant =
&c->Program.Constants.Constants[src_reg->Index];
if (constant->Type != RC_CONSTANT_IMMEDIATE) {
continue;
}
new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
for (chan = 0; chan < 4; chan++) {
unsigned char r300_float_tmp;
swz = GET_SWZ(src_reg->Swizzle, chan);
if (swz == RC_SWIZZLE_UNUSED) {
continue;
}
float_value = constant->u.Immediate[swz];
ret = ieee_754_to_r300_float(float_value,
&r300_float_tmp);
if (!ret || (use_literal &&
r300_float != r300_float_tmp)) {
use_literal = 0;
break;
}
if (ret == -1 && src_reg->Abs) {
use_literal = 0;
break;
}
if (!use_literal) {
r300_float = r300_float_tmp;
use_literal = 1;
}
/* Use RC_SWIZZLE_W for the inline constant, so
* it will become one of the alpha sources. */
SET_SWZ(new_swizzle, chan, RC_SWIZZLE_W);
if (ret == -1) {
negate_mask |= (1 << chan);
}
}
if (!use_literal) {
continue;
}
src_reg->File = RC_FILE_INLINE;
src_reg->Index = r300_float;
src_reg->Swizzle = new_swizzle;
src_reg->Negate = src_reg->Negate ^ negate_mask;
}
}
}

View file

@ -268,7 +268,15 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
pair->Alpha.Arg[i].Source = source;
pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W);
if (istranscendent) {
pair->Alpha.Arg[i].Negate =
!!(inst->SrcReg[i].Negate &
inst->DstReg.WriteMask);
} else {
pair->Alpha.Arg[i].Negate =
!!(inst->SrcReg[i].Negate & RC_MASK_W);
}
}
}

View file

@ -85,7 +85,12 @@ typedef enum {
* Indicates this register should use the result of the presubtract
* operation.
*/
RC_FILE_PRESUB
RC_FILE_PRESUB,
/**
* Indicates that the source index has been encoded as a 7-bit float.
*/
RC_FILE_INLINE
} rc_register_file;
enum {

View file

@ -57,7 +57,7 @@ struct radeon_compiler;
struct rc_pair_instruction_source {
unsigned int Used:1;
unsigned int File:3;
unsigned int File:4;
unsigned int Index:RC_REGISTER_INDEX_BITS;
};

View file

@ -109,6 +109,22 @@ static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func fun
}
}
static void rc_print_inline_float(FILE * f, int index)
{
int r300_exponent = (index >> 3) & 0xf;
unsigned r300_mantissa = index & 0x7;
unsigned float_exponent;
unsigned real_float;
float * print_float = (float*) &real_float;
r300_exponent -= 7;
float_exponent = r300_exponent + 127;
real_float = (r300_mantissa << 20) | (float_exponent << 23);
fprintf(f, "%f (0x%x)", *print_float, index);
}
static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr)
{
if (file == RC_FILE_NONE) {
@ -118,6 +134,8 @@ static void rc_print_register(FILE * f, rc_register_file file, int index, unsign
case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break;
default: fprintf(f, "special[%i]", index); break;
}
} else if (file == RC_FILE_INLINE) {
rc_print_inline_float(f, index);
} else {
const char * filename;
switch(file) {