mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 17:30:12 +01:00
r300/compiler: Add support for inline literals
On R500 chips, shader instructions can take 7-bit (3-bit mantissa, 4-bit exponent) floating point values as inputs in place of registers.
This commit is contained in:
parent
95594bae47
commit
befcce264c
12 changed files with 192 additions and 9 deletions
|
|
@ -28,6 +28,7 @@ C_SOURCES := \
|
|||
compiler/radeon_compiler_util.c \
|
||||
compiler/radeon_emulate_branches.c \
|
||||
compiler/radeon_emulate_loops.c \
|
||||
compiler/radeon_inline_literals.c \
|
||||
compiler/radeon_program.c \
|
||||
compiler/radeon_program_print.c \
|
||||
compiler/radeon_opcodes.c \
|
||||
|
|
|
|||
|
|
@ -125,6 +125,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
|
|||
{"emulate loops", 1, !is_r500, rc_emulate_loops, NULL},
|
||||
{"register rename", 1, !is_r500 || opt, rc_rename_regs, NULL},
|
||||
{"dataflow optimize", 1, opt, rc_optimize, NULL},
|
||||
{"inline literals", 1, is_r500 && opt, rc_inline_literals, NULL},
|
||||
{"dataflow swizzles", 1, 1, rc_dataflow_swizzles, NULL},
|
||||
{"dead constants", 1, 1, rc_remove_unused_constants, &c->code->constants_remap_table},
|
||||
{"pair translate", 1, 1, rc_pair_translate, NULL},
|
||||
|
|
|
|||
|
|
@ -218,6 +218,8 @@ static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
|
|||
return 1;
|
||||
|
||||
return 0;
|
||||
} else if (reg.File == RC_FILE_INLINE) {
|
||||
return 1;
|
||||
} else {
|
||||
/* ALU instructions support almost everything */
|
||||
relevant = 0;
|
||||
|
|
|
|||
|
|
@ -210,6 +210,8 @@ static unsigned int use_source(struct r500_fragment_program_code* code, struct r
|
|||
} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
|
||||
use_temporary(code, src.Index);
|
||||
return src.Index;
|
||||
} else if (src.File == RC_FILE_INLINE) {
|
||||
return src.Index | (1 << 7);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -357,21 +357,22 @@ void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
|
|||
static void reg_count_callback(void * userdata, struct rc_instruction * inst,
|
||||
rc_register_file file, unsigned int index, unsigned int mask)
|
||||
{
|
||||
int *max_reg = userdata;
|
||||
struct rc_program_stats *s = userdata;
|
||||
if (file == RC_FILE_TEMPORARY)
|
||||
(int)index > *max_reg ? *max_reg = index : 0;
|
||||
(int)index > s->num_temp_regs ? s->num_temp_regs = index : 0;
|
||||
if (file == RC_FILE_INLINE)
|
||||
s->num_inline_literals++;
|
||||
}
|
||||
|
||||
void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
|
||||
{
|
||||
int max_reg = -1;
|
||||
struct rc_instruction * tmp;
|
||||
memset(s, 0, sizeof(*s));
|
||||
|
||||
for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
|
||||
tmp = tmp->Next){
|
||||
const struct rc_opcode_info * info;
|
||||
rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg);
|
||||
rc_for_all_reads_mask(tmp, reg_count_callback, s);
|
||||
if (tmp->Type == RC_INSTRUCTION_NORMAL) {
|
||||
info = rc_get_opcode_info(tmp->U.I.Opcode);
|
||||
if (info->Opcode == RC_OPCODE_BEGIN_TEX)
|
||||
|
|
@ -405,7 +406,9 @@ void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
|
|||
s->num_tex_insts++;
|
||||
s->num_insts++;
|
||||
}
|
||||
s->num_temp_regs = max_reg + 1;
|
||||
/* Increment here because the reg_count_callback store the max
|
||||
* temporary reg index in s->nun_temp_regs. */
|
||||
s->num_temp_regs++;
|
||||
}
|
||||
|
||||
static void print_stats(struct radeon_compiler * c)
|
||||
|
|
@ -437,10 +440,11 @@ static void print_stats(struct radeon_compiler * c)
|
|||
"~%4u Presub Operations\n"
|
||||
"~%4u OMOD Operations\n"
|
||||
"~%4u Temporary Registers\n"
|
||||
"~%4u Inline Literals\n"
|
||||
"~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n",
|
||||
s.num_insts, s.num_rgb_insts, s.num_alpha_insts,
|
||||
s.num_fc_insts, s.num_tex_insts, s.num_presub_ops,
|
||||
s.num_omod_ops, s.num_temp_regs);
|
||||
s.num_omod_ops, s.num_temp_regs, s.num_inline_literals);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
|
|
|
|||
|
|
@ -161,6 +161,7 @@ struct rc_program_stats {
|
|||
unsigned num_presub_ops;
|
||||
unsigned num_temp_regs;
|
||||
unsigned num_omod_ops;
|
||||
unsigned num_inline_literals;
|
||||
};
|
||||
|
||||
void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s);
|
||||
|
|
|
|||
|
|
@ -130,5 +130,6 @@ void rc_dataflow_swizzles(struct radeon_compiler * c, void *user);
|
|||
/*@}*/
|
||||
|
||||
void rc_optimize(struct radeon_compiler * c, void *user);
|
||||
void rc_inline_literals(struct radeon_compiler *c, void *user);
|
||||
|
||||
#endif /* RADEON_DATAFLOW_H */
|
||||
|
|
|
|||
140
src/gallium/drivers/r300/compiler/radeon_inline_literals.c
Normal file
140
src/gallium/drivers/r300/compiler/radeon_inline_literals.c
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
|
||||
#include "radeon_compiler.h"
|
||||
#include "radeon_compiler_util.h"
|
||||
#include "radeon_dataflow.h"
|
||||
#include "radeon_program.h"
|
||||
#include "radeon_program_constants.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#define VERBOSE 0
|
||||
|
||||
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
|
||||
|
||||
/* IEEE-754:
|
||||
* 22:0 mantissa
|
||||
* 30:23 exponent
|
||||
* 31 sign
|
||||
*
|
||||
* R300:
|
||||
* 0:2 mantissa
|
||||
* 3:6 exponent (bias 7)
|
||||
*/
|
||||
static int ieee_754_to_r300_float(float f, unsigned char *r300_float_out)
|
||||
{
|
||||
unsigned float_bits = *((unsigned *)&f);
|
||||
/* XXX: Handle big-endian */
|
||||
unsigned mantissa = float_bits & 0x007fffff;
|
||||
unsigned biased_exponent = (float_bits & 0x7f800000) >> 23;
|
||||
unsigned negate = !!(float_bits & 0x80000000);
|
||||
int exponent = biased_exponent - 127;
|
||||
unsigned mantissa_mask = 0xff8fffff;
|
||||
unsigned r300_exponent, r300_mantissa;
|
||||
|
||||
DBG("Converting %f (0x%x) to 7-bit:\n", f, float_bits);
|
||||
DBG("Raw exponent = %d\n", exponent);
|
||||
|
||||
if (exponent < -7 || exponent > 8) {
|
||||
DBG("Failed exponent out of range\n\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (mantissa & mantissa_mask) {
|
||||
DBG("Failed mantisa has too many bits:\n"
|
||||
"manitssa=0x%x mantissa_mask=0x%x, and=0x%x\n\n",
|
||||
mantissa, mantissa_mask,
|
||||
mantissa & mantissa_mask);
|
||||
return 0;
|
||||
}
|
||||
|
||||
r300_exponent = exponent + 7;
|
||||
r300_mantissa = (mantissa & ~mantissa_mask) >> 20;
|
||||
*r300_float_out = r300_mantissa | (r300_exponent << 3);
|
||||
|
||||
DBG("Success! r300_float = 0x%x\n\n", *r300_float_out);
|
||||
|
||||
if (negate)
|
||||
return -1;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
void rc_inline_literals(struct radeon_compiler *c, void *user)
|
||||
{
|
||||
struct rc_instruction * inst;
|
||||
|
||||
for(inst = c->Program.Instructions.Next;
|
||||
inst != &c->Program.Instructions;
|
||||
inst = inst->Next) {
|
||||
const struct rc_opcode_info * info =
|
||||
rc_get_opcode_info(inst->U.I.Opcode);
|
||||
|
||||
unsigned src_idx;
|
||||
struct rc_constant * constant;
|
||||
float float_value;
|
||||
unsigned char r300_float;
|
||||
int ret;
|
||||
|
||||
/* XXX: Handle presub */
|
||||
|
||||
/* We aren't using rc_for_all_reads_src here, because presub
|
||||
* sources need to be handled differently. */
|
||||
for (src_idx = 0; src_idx < info->NumSrcRegs; src_idx++) {
|
||||
unsigned new_swizzle;
|
||||
unsigned use_literal = 0;
|
||||
unsigned negate_mask = 0;
|
||||
unsigned swz, chan;
|
||||
struct rc_src_register * src_reg =
|
||||
&inst->U.I.SrcReg[src_idx];
|
||||
swz = RC_SWIZZLE_UNUSED;
|
||||
if (src_reg->File != RC_FILE_CONSTANT) {
|
||||
continue;
|
||||
}
|
||||
constant =
|
||||
&c->Program.Constants.Constants[src_reg->Index];
|
||||
if (constant->Type != RC_CONSTANT_IMMEDIATE) {
|
||||
continue;
|
||||
}
|
||||
new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
unsigned char r300_float_tmp;
|
||||
swz = GET_SWZ(src_reg->Swizzle, chan);
|
||||
if (swz == RC_SWIZZLE_UNUSED) {
|
||||
continue;
|
||||
}
|
||||
float_value = constant->u.Immediate[swz];
|
||||
ret = ieee_754_to_r300_float(float_value,
|
||||
&r300_float_tmp);
|
||||
if (!ret || (use_literal &&
|
||||
r300_float != r300_float_tmp)) {
|
||||
use_literal = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (ret == -1 && src_reg->Abs) {
|
||||
use_literal = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!use_literal) {
|
||||
r300_float = r300_float_tmp;
|
||||
use_literal = 1;
|
||||
}
|
||||
|
||||
/* Use RC_SWIZZLE_W for the inline constant, so
|
||||
* it will become one of the alpha sources. */
|
||||
SET_SWZ(new_swizzle, chan, RC_SWIZZLE_W);
|
||||
if (ret == -1) {
|
||||
negate_mask |= (1 << chan);
|
||||
}
|
||||
}
|
||||
|
||||
if (!use_literal) {
|
||||
continue;
|
||||
}
|
||||
src_reg->File = RC_FILE_INLINE;
|
||||
src_reg->Index = r300_float;
|
||||
src_reg->Swizzle = new_swizzle;
|
||||
src_reg->Negate = src_reg->Negate ^ negate_mask;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -268,7 +268,15 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
|
|||
pair->Alpha.Arg[i].Source = source;
|
||||
pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
|
||||
pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
|
||||
pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W);
|
||||
|
||||
if (istranscendent) {
|
||||
pair->Alpha.Arg[i].Negate =
|
||||
!!(inst->SrcReg[i].Negate &
|
||||
inst->DstReg.WriteMask);
|
||||
} else {
|
||||
pair->Alpha.Arg[i].Negate =
|
||||
!!(inst->SrcReg[i].Negate & RC_MASK_W);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -85,7 +85,12 @@ typedef enum {
|
|||
* Indicates this register should use the result of the presubtract
|
||||
* operation.
|
||||
*/
|
||||
RC_FILE_PRESUB
|
||||
RC_FILE_PRESUB,
|
||||
|
||||
/**
|
||||
* Indicates that the source index has been encoded as a 7-bit float.
|
||||
*/
|
||||
RC_FILE_INLINE
|
||||
} rc_register_file;
|
||||
|
||||
enum {
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ struct radeon_compiler;
|
|||
|
||||
struct rc_pair_instruction_source {
|
||||
unsigned int Used:1;
|
||||
unsigned int File:3;
|
||||
unsigned int File:4;
|
||||
unsigned int Index:RC_REGISTER_INDEX_BITS;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -109,6 +109,22 @@ static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func fun
|
|||
}
|
||||
}
|
||||
|
||||
static void rc_print_inline_float(FILE * f, int index)
|
||||
{
|
||||
int r300_exponent = (index >> 3) & 0xf;
|
||||
unsigned r300_mantissa = index & 0x7;
|
||||
unsigned float_exponent;
|
||||
unsigned real_float;
|
||||
float * print_float = (float*) &real_float;
|
||||
|
||||
r300_exponent -= 7;
|
||||
float_exponent = r300_exponent + 127;
|
||||
real_float = (r300_mantissa << 20) | (float_exponent << 23);
|
||||
|
||||
fprintf(f, "%f (0x%x)", *print_float, index);
|
||||
|
||||
}
|
||||
|
||||
static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr)
|
||||
{
|
||||
if (file == RC_FILE_NONE) {
|
||||
|
|
@ -118,6 +134,8 @@ static void rc_print_register(FILE * f, rc_register_file file, int index, unsign
|
|||
case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break;
|
||||
default: fprintf(f, "special[%i]", index); break;
|
||||
}
|
||||
} else if (file == RC_FILE_INLINE) {
|
||||
rc_print_inline_float(f, index);
|
||||
} else {
|
||||
const char * filename;
|
||||
switch(file) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue