mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 07:28:11 +02:00
r300_fragprog: Refactor TEX transformation
Streamlining source and destination registers, as well as texcoord scaling for RECT textures is now done in a radeon_program based transformation. The idea is that this will allow us to optimize away unnecessary indirections more easily.
This commit is contained in:
parent
b5170bc9d3
commit
e34dc8227c
5 changed files with 293 additions and 93 deletions
|
|
@ -50,6 +50,130 @@
|
|||
#include "r300_state.h"
|
||||
|
||||
|
||||
static void reset_srcreg(struct prog_src_register* reg)
|
||||
{
|
||||
_mesa_bzero(reg, sizeof(*reg));
|
||||
reg->Swizzle = SWIZZLE_NOOP;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transform TEX, TXP, TXB, and KIL instructions in the following way:
|
||||
* - premultiply texture coordinates for RECT
|
||||
* - extract operand swizzles
|
||||
* - introduce a temporary register when write masks are needed
|
||||
*
|
||||
* \todo If/when r5xx uses the radeon_program architecture, this can probably
|
||||
* be reused.
|
||||
*/
|
||||
static GLboolean transform_TEX(
|
||||
struct radeon_program_transform_context* context,
|
||||
struct prog_instruction* orig_inst, void* data)
|
||||
{
|
||||
struct r300_fragment_program_compiler *compiler =
|
||||
(struct r300_fragment_program_compiler*)data;
|
||||
struct prog_instruction inst = *orig_inst;
|
||||
struct prog_instruction* tgt;
|
||||
GLboolean destredirect = GL_FALSE;
|
||||
|
||||
if (inst.Opcode != OPCODE_TEX &&
|
||||
inst.Opcode != OPCODE_TXB &&
|
||||
inst.Opcode != OPCODE_TXP &&
|
||||
inst.Opcode != OPCODE_KIL)
|
||||
return GL_FALSE;
|
||||
|
||||
/* Hardware uses [0..1]x[0..1] range for rectangle textures
|
||||
* instead of [0..Width]x[0..Height].
|
||||
* Add a scaling instruction.
|
||||
*/
|
||||
if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) {
|
||||
gl_state_index tokens[STATE_LENGTH] = {
|
||||
STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0,
|
||||
0
|
||||
};
|
||||
|
||||
int tempreg = radeonCompilerAllocateTemporary(context->compiler);
|
||||
int factor_index;
|
||||
|
||||
tokens[2] = inst.TexSrcUnit;
|
||||
factor_index =
|
||||
_mesa_add_state_reference(
|
||||
compiler->fp->mesa_program.Base.Parameters, tokens);
|
||||
|
||||
tgt = radeonClauseInsertInstructions(context->compiler, context->dest,
|
||||
context->dest->NumInstructions, 1);
|
||||
|
||||
tgt->Opcode = OPCODE_MAD;
|
||||
tgt->DstReg.File = PROGRAM_TEMPORARY;
|
||||
tgt->DstReg.Index = tempreg;
|
||||
tgt->SrcReg[0] = inst.SrcReg[0];
|
||||
tgt->SrcReg[1].File = PROGRAM_STATE_VAR;
|
||||
tgt->SrcReg[1].Index = factor_index;
|
||||
tgt->SrcReg[2].File = PROGRAM_BUILTIN;
|
||||
tgt->SrcReg[2].Swizzle = SWIZZLE_0000;
|
||||
|
||||
reset_srcreg(&inst.SrcReg[0]);
|
||||
inst.SrcReg[0].File = PROGRAM_TEMPORARY;
|
||||
inst.SrcReg[0].Index = tempreg;
|
||||
}
|
||||
|
||||
/* Texture operations do not support swizzles etc. in hardware,
|
||||
* so emit an additional arithmetic operation if necessary.
|
||||
*/
|
||||
if (inst.SrcReg[0].Swizzle != SWIZZLE_NOOP ||
|
||||
inst.SrcReg[0].Abs || inst.SrcReg[0].NegateBase || inst.SrcReg[0].NegateAbs) {
|
||||
int tempreg = radeonCompilerAllocateTemporary(context->compiler);
|
||||
|
||||
tgt = radeonClauseInsertInstructions(context->compiler, context->dest,
|
||||
context->dest->NumInstructions, 1);
|
||||
|
||||
tgt->Opcode = OPCODE_MAD;
|
||||
tgt->DstReg.File = PROGRAM_TEMPORARY;
|
||||
tgt->DstReg.Index = tempreg;
|
||||
tgt->SrcReg[0] = inst.SrcReg[0];
|
||||
tgt->SrcReg[1].File = PROGRAM_BUILTIN;
|
||||
tgt->SrcReg[1].Swizzle = SWIZZLE_1111;
|
||||
tgt->SrcReg[2].File = PROGRAM_BUILTIN;
|
||||
tgt->SrcReg[2].Swizzle = SWIZZLE_0000;
|
||||
|
||||
reset_srcreg(&inst.SrcReg[0]);
|
||||
inst.SrcReg[0].File = PROGRAM_TEMPORARY;
|
||||
inst.SrcReg[0].Index = tempreg;
|
||||
}
|
||||
|
||||
if (inst.Opcode != OPCODE_KIL) {
|
||||
if (inst.DstReg.File != PROGRAM_TEMPORARY ||
|
||||
inst.DstReg.WriteMask != WRITEMASK_XYZW) {
|
||||
int tempreg = radeonCompilerAllocateTemporary(context->compiler);
|
||||
|
||||
inst.DstReg.File = PROGRAM_TEMPORARY;
|
||||
inst.DstReg.Index = tempreg;
|
||||
inst.DstReg.WriteMask = WRITEMASK_XYZW;
|
||||
destredirect = GL_TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
tgt = radeonClauseInsertInstructions(context->compiler, context->dest,
|
||||
context->dest->NumInstructions, 1);
|
||||
_mesa_copy_instructions(tgt, &inst, 1);
|
||||
|
||||
if (destredirect) {
|
||||
tgt = radeonClauseInsertInstructions(context->compiler, context->dest,
|
||||
context->dest->NumInstructions, 1);
|
||||
|
||||
tgt->Opcode = OPCODE_MAD;
|
||||
tgt->DstReg = orig_inst->DstReg;
|
||||
tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
|
||||
tgt->SrcReg[0].Index = inst.DstReg.Index;
|
||||
tgt->SrcReg[1].File = PROGRAM_BUILTIN;
|
||||
tgt->SrcReg[1].Swizzle = SWIZZLE_1111;
|
||||
tgt->SrcReg[2].File = PROGRAM_BUILTIN;
|
||||
tgt->SrcReg[2].Swizzle = SWIZZLE_0000;
|
||||
}
|
||||
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
|
||||
static void update_params(r300ContextPtr r300, struct r300_fragment_program *fp)
|
||||
{
|
||||
struct gl_fragment_program *mp = &fp->mesa_program;
|
||||
|
|
@ -170,6 +294,13 @@ void r300TranslateFragmentShader(r300ContextPtr r300,
|
|||
|
||||
insert_WPOS_trailer(&compiler);
|
||||
|
||||
struct radeon_program_transformation transformations[1] = {
|
||||
{ &transform_TEX, &compiler }
|
||||
};
|
||||
radeonClauseLocalTransform(&compiler.compiler,
|
||||
&compiler.compiler.Clauses[0],
|
||||
1, transformations);
|
||||
|
||||
if (!r300FragmentProgramEmit(&compiler))
|
||||
fp->error = GL_TRUE;
|
||||
|
||||
|
|
|
|||
|
|
@ -149,6 +149,7 @@ struct r300_fragment_program_compiler {
|
|||
struct radeon_compiler compiler;
|
||||
};
|
||||
|
||||
extern void r300FPTransformTextures(struct r300_fragment_program_compiler *compiler);
|
||||
extern GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler);
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -527,32 +527,6 @@ static GLuint get_temp_reg(struct r300_pfs_compile_state *cs)
|
|||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new Mesa temporary register that will act as the destination
|
||||
* register for a texture read.
|
||||
*/
|
||||
static GLuint get_temp_reg_tex(struct r300_pfs_compile_state *cs)
|
||||
{
|
||||
COMPILE_STATE;
|
||||
GLuint r = undef;
|
||||
GLuint index;
|
||||
|
||||
index = ffs(~cs->temp_in_use);
|
||||
if (!index) {
|
||||
ERROR("Out of program temps\n");
|
||||
return r;
|
||||
}
|
||||
|
||||
cs->temp_in_use |= (1 << --index);
|
||||
cs->temps[index].refcount = 0xFFFFFFFF;
|
||||
cs->temps[index].reg = get_hw_temp_tex(cs);
|
||||
|
||||
REG_SET_TYPE(r, REG_TYPE_TEMP);
|
||||
REG_SET_INDEX(r, index);
|
||||
REG_SET_VALID(r, GL_TRUE);
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* Free a Mesa temporary and the associated R300 temporary.
|
||||
*/
|
||||
|
|
@ -847,6 +821,15 @@ static GLuint t_src(struct r300_pfs_compile_state *cs,
|
|||
fp->mesa_program.Base.Parameters->
|
||||
ParameterValues[fpsrc.Index]);
|
||||
break;
|
||||
case PROGRAM_BUILTIN:
|
||||
switch(fpsrc.Swizzle) {
|
||||
case SWIZZLE_1111: r = pfs_one; break;
|
||||
case SWIZZLE_0000: r = pfs_zero; break;
|
||||
default:
|
||||
ERROR("bad PROGRAM_BUILTIN swizzle %u\n", fpsrc.Swizzle);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ERROR("unknown SrcReg->File %x\n", fpsrc.File);
|
||||
return r;
|
||||
|
|
@ -1003,56 +986,10 @@ static void emit_tex(struct r300_pfs_compile_state *cs,
|
|||
{
|
||||
COMPILE_STATE;
|
||||
GLuint coord = t_src(cs, fpi->SrcReg[0]);
|
||||
GLuint dest = undef, rdest = undef;
|
||||
GLuint dest = undef;
|
||||
GLuint din, uin;
|
||||
int unit = fpi->TexSrcUnit;
|
||||
int hwsrc, hwdest;
|
||||
GLuint tempreg = 0;
|
||||
|
||||
/**
|
||||
* Hardware uses [0..1]x[0..1] range for rectangle textures
|
||||
* instead of [0..Width]x[0..Height].
|
||||
* Add a scaling instruction.
|
||||
*
|
||||
* \todo Refactor this once we have proper rewriting/optimization
|
||||
* support for programs.
|
||||
*/
|
||||
if (opcode != R300_TEX_OP_KIL && fpi->TexSrcTarget == TEXTURE_RECT_INDEX) {
|
||||
gl_state_index tokens[STATE_LENGTH] = {
|
||||
STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0,
|
||||
0
|
||||
};
|
||||
int factor_index;
|
||||
GLuint factorreg;
|
||||
|
||||
tokens[2] = unit;
|
||||
factor_index =
|
||||
_mesa_add_state_reference(fp->mesa_program.Base.
|
||||
Parameters, tokens);
|
||||
factorreg =
|
||||
emit_const4fv(cs,
|
||||
fp->mesa_program.Base.Parameters->
|
||||
ParameterValues[factor_index]);
|
||||
tempreg = keep(get_temp_reg(cs));
|
||||
|
||||
emit_arith(cs, PFS_OP_MAD, tempreg, WRITEMASK_XYZW,
|
||||
coord, factorreg, pfs_zero, 0);
|
||||
|
||||
coord = tempreg;
|
||||
}
|
||||
|
||||
/* Texture operations do not support swizzles etc. in hardware,
|
||||
* so emit an additional arithmetic operation if necessary.
|
||||
*/
|
||||
if (REG_GET_VSWZ(coord) != SWIZZLE_XYZ ||
|
||||
REG_GET_SSWZ(coord) != SWIZZLE_W ||
|
||||
coord & (REG_NEGV_MASK | REG_NEGS_MASK | REG_ABS_MASK)) {
|
||||
assert(tempreg == 0);
|
||||
tempreg = keep(get_temp_reg(cs));
|
||||
emit_arith(cs, PFS_OP_MAD, tempreg, WRITEMASK_XYZW,
|
||||
coord, pfs_one, pfs_zero, 0);
|
||||
coord = tempreg;
|
||||
}
|
||||
|
||||
/* Ensure correct node indirection */
|
||||
uin = cs->used_in_node;
|
||||
|
|
@ -1064,15 +1001,6 @@ static void emit_tex(struct r300_pfs_compile_state *cs,
|
|||
if (opcode != R300_TEX_OP_KIL) {
|
||||
dest = t_dst(cs, fpi->DstReg);
|
||||
|
||||
/* r300 doesn't seem to be able to do TEX->output reg */
|
||||
if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) {
|
||||
rdest = dest;
|
||||
dest = get_temp_reg_tex(cs);
|
||||
} else if (fpi->DstReg.WriteMask != WRITEMASK_XYZW) {
|
||||
/* in case write mask isn't XYZW */
|
||||
rdest = dest;
|
||||
dest = get_temp_reg_tex(cs);
|
||||
}
|
||||
hwdest =
|
||||
t_hw_dst(cs, dest, GL_TRUE,
|
||||
code->node[code->cur_node].alu_offset);
|
||||
|
|
@ -1132,17 +1060,6 @@ static void emit_tex(struct r300_pfs_compile_state *cs,
|
|||
cs->used_in_node |= (1 << hwsrc);
|
||||
|
||||
code->node[code->cur_node].tex_end++;
|
||||
|
||||
/* Copy from temp to output if needed */
|
||||
if (REG_GET_VALID(rdest)) {
|
||||
emit_arith(cs, PFS_OP_MAD, rdest, fpi->DstReg.WriteMask, dest,
|
||||
pfs_one, pfs_zero, 0);
|
||||
free_temp(cs, dest);
|
||||
}
|
||||
|
||||
/* Free temp register */
|
||||
if (tempreg != 0)
|
||||
free_temp(cs, tempreg);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -149,3 +149,101 @@ void radeonCompilerEraseClauses(
|
|||
|
||||
_mesa_free(oldClauses);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Insert new instructions at the given position, initialize them as NOPs
|
||||
* and return a pointer to the first new instruction.
|
||||
*/
|
||||
struct prog_instruction* radeonClauseInsertInstructions(
|
||||
struct radeon_compiler *compiler,
|
||||
struct radeon_clause *clause,
|
||||
int position, int count)
|
||||
{
|
||||
int newNumInstructions = clause->NumInstructions + count;
|
||||
|
||||
assert(position >= 0 && position <= clause->NumInstructions);
|
||||
|
||||
if (newNumInstructions <= clause->ReservedInstructions) {
|
||||
memmove(clause->Instructions + position + count, clause->Instructions + position,
|
||||
(clause->NumInstructions - position) * sizeof(struct prog_instruction));
|
||||
} else {
|
||||
struct prog_instruction *oldInstructions = clause->Instructions;
|
||||
|
||||
clause->ReservedInstructions *= 2;
|
||||
if (newNumInstructions > clause->ReservedInstructions)
|
||||
clause->ReservedInstructions = newNumInstructions;
|
||||
|
||||
clause->Instructions = (struct prog_instruction*)
|
||||
_mesa_malloc(clause->ReservedInstructions * sizeof(struct prog_instruction));
|
||||
|
||||
if (oldInstructions) {
|
||||
_mesa_memcpy(clause->Instructions, oldInstructions,
|
||||
position * sizeof(struct prog_instruction));
|
||||
_mesa_memcpy(clause->Instructions + position + count, oldInstructions + position,
|
||||
(clause->NumInstructions - position) * sizeof(struct prog_instruction));
|
||||
|
||||
_mesa_free(oldInstructions);
|
||||
}
|
||||
}
|
||||
|
||||
clause->NumInstructions = newNumInstructions;
|
||||
_mesa_init_instructions(clause->Instructions + position, count);
|
||||
return clause->Instructions + position;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Transform the given clause in the following way:
|
||||
* 1. Replace it with an empty clause
|
||||
* 2. For every instruction in the original clause, try the given
|
||||
* transformations in order.
|
||||
* 3. If one of the transformations returns GL_TRUE, assume that it
|
||||
* has emitted the appropriate instruction(s) into the new clause;
|
||||
* otherwise, copy the instruction verbatim.
|
||||
*
|
||||
* \note The transformation is currently not recursive; in other words,
|
||||
* instructions emitted by transformations are not transformed.
|
||||
*
|
||||
* \note The transform is called 'local' because it can only look at
|
||||
* one instruction at a time.
|
||||
*/
|
||||
void radeonClauseLocalTransform(
|
||||
struct radeon_compiler *compiler,
|
||||
struct radeon_clause *clause,
|
||||
int num_transformations,
|
||||
struct radeon_program_transformation* transformations)
|
||||
{
|
||||
struct radeon_program_transform_context context;
|
||||
struct radeon_clause source;
|
||||
int ip;
|
||||
|
||||
source = *clause;
|
||||
clause->Instructions = 0;
|
||||
clause->NumInstructions = 0;
|
||||
clause->ReservedInstructions = 0;
|
||||
|
||||
context.compiler = compiler;
|
||||
context.dest = clause;
|
||||
context.src = &source;
|
||||
|
||||
for(ip = 0; ip < source.NumInstructions; ++ip) {
|
||||
struct prog_instruction *instr = source.Instructions + ip;
|
||||
int i;
|
||||
|
||||
for(i = 0; i < num_transformations; ++i) {
|
||||
struct radeon_program_transformation* t = transformations + i;
|
||||
|
||||
if (t->function(&context, instr, t->userData))
|
||||
break;
|
||||
}
|
||||
|
||||
if (i >= num_transformations) {
|
||||
struct prog_instruction *tgt =
|
||||
radeonClauseInsertInstructions(compiler, clause, clause->NumInstructions, 1);
|
||||
_mesa_copy_instructions(tgt, instr, 1);
|
||||
}
|
||||
}
|
||||
|
||||
_mesa_free_instructions(source.Instructions, source.NumInstructions);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,6 +41,13 @@ enum {
|
|||
CLAUSE_TEX
|
||||
};
|
||||
|
||||
enum {
|
||||
PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */
|
||||
};
|
||||
|
||||
#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO)
|
||||
#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE)
|
||||
|
||||
/**
|
||||
* A clause is simply a sequence of instructions that are executed
|
||||
* in order.
|
||||
|
|
@ -107,4 +114,50 @@ void radeonCompilerEraseClauses(
|
|||
int start,
|
||||
int end);
|
||||
|
||||
struct prog_instruction* radeonClauseInsertInstructions(
|
||||
struct radeon_compiler *compiler,
|
||||
struct radeon_clause *clause,
|
||||
int position, int count);
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
struct radeon_program_transform_context {
|
||||
struct radeon_compiler *compiler;
|
||||
|
||||
/**
|
||||
* Destination clause where new instructions must be written.
|
||||
*/
|
||||
struct radeon_clause *dest;
|
||||
|
||||
/**
|
||||
* Original clause that is currently being transformed.
|
||||
*/
|
||||
struct radeon_clause *src;
|
||||
};
|
||||
|
||||
/**
|
||||
* A transformation that can be passed to \ref radeonClauseLinearTransform.
|
||||
*
|
||||
* The function will be called once for each instruction.
|
||||
* It has to either emit the appropriate transformed code for the instruction
|
||||
* and return GL_TRUE, or return GL_FALSE if it doesn't understand the
|
||||
* instruction.
|
||||
*
|
||||
* The function gets passed the userData as last parameter.
|
||||
*/
|
||||
struct radeon_program_transformation {
|
||||
GLboolean (*function)(
|
||||
struct radeon_program_transform_context*,
|
||||
struct prog_instruction*,
|
||||
void*);
|
||||
void *userData;
|
||||
};
|
||||
|
||||
void radeonClauseLocalTransform(
|
||||
struct radeon_compiler *compiler,
|
||||
struct radeon_clause *clause,
|
||||
int num_transformations,
|
||||
struct radeon_program_transformation* transformations);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue