Improved register allocation: allow four 'float' vars or temporaries to share a single register. Clean-up needed.

This commit is contained in:
Brian 2007-01-27 20:06:41 -07:00
parent ea8b68e0f7
commit d6772f157a
3 changed files with 136 additions and 63 deletions

View file

@ -40,7 +40,7 @@
#define PEEPHOLE_OPTIMIZATIONS 1
#define ANNOTATE 0
#define ANNOTATE 1
/**
@ -336,11 +336,14 @@ static void
alloc_temp_storage(slang_var_table *vt, slang_ir_node *n, GLint size)
{
GLint indx;
GLuint swizzle;
assert(!n->Var);
assert(!n->Store);
assert(size > 0);
indx = _slang_alloc_temp(vt, size);
indx = _slang_alloc_temp(vt, size, &swizzle);
n->Store = _slang_new_ir_storage(PROGRAM_TEMPORARY, indx, size);
if (n->Store)
n->Store->Swizzle = swizzle;
}
@ -352,8 +355,8 @@ static void
free_temp_storage(slang_var_table *vt, slang_ir_node *n)
{
if (n->Store->File == PROGRAM_TEMPORARY && n->Store->Index >= 0) {
if (_slang_is_temp(vt, n->Store->Index)) {
_slang_free_temp(vt, n->Store->Index, n->Store->Size);
if (_slang_is_temp(vt, n->Store->Index, n->Store->Swizzle)) {
_slang_free_temp(vt, n->Store->Index, n->Store->Size, n->Store->Swizzle);
/* XXX free(store)? */
n->Store->Index = -1;
n->Store->Size = -1;
@ -381,7 +384,15 @@ storage_to_dst_reg(struct prog_dst_register *dst, const slang_ir_storage *st,
assert(st->File != PROGRAM_UNDEFINED);
assert(st->Size >= 1);
assert(st->Size <= 4);
dst->WriteMask = defaultWritemask[st->Size - 1] & writemask;
if (st->Size == 1) {
GLuint comp = GET_SWZ(st->Swizzle, 0);
assert(comp < 4);
assert(writemask & WRITEMASK_X);
dst->WriteMask = WRITEMASK_X << comp;
}
else {
dst->WriteMask = defaultWritemask[st->Size - 1] & writemask;
}
}
@ -803,13 +814,15 @@ emit_move(slang_var_table *vt, slang_ir_node *n, struct gl_program *prog)
emit(vt, n->Children[0], prog);
#if PEEPHOLE_OPTIMIZATIONS
if (inst && _slang_is_temp(vt, n->Children[1]->Store->Index)) {
if (inst && _slang_is_temp(vt, n->Children[1]->Store->Index,
n->Children[1]->Store->Swizzle)) {
/* Peephole optimization:
* Just modify the RHS to put its result into the dest of this
* MOVE operation. Then, this MOVE is a no-op.
*/
_slang_free_temp(vt, n->Children[1]->Store->Index,
n->Children[1]->Store->Size);
n->Children[1]->Store->Size,
n->Children[1]->Store->Swizzle);
*n->Children[1]->Store = *n->Children[0]->Store;
/* fixup the prev (RHS) instruction */
assert(n->Children[0]->Store->Index >= 0);
@ -852,12 +865,7 @@ emit_move(slang_var_table *vt, slang_ir_node *n, struct gl_program *prog)
inst->Comment = instruction_annotation(inst->Opcode, dstAnnot,
srcAnnot, NULL);
}
/* XXX is this test correct? */
if (_slang_is_temp(vt, n->Children[1]->Store->Index)) {
_slang_free_temp(vt, n->Children[1]->Store->Index,
n->Children[1]->Store->Size);
}
/*inst->Comment = _mesa_strdup("IR_MOVE");*/
free_temp_storage(vt, n->Children[1]);
assert(!n->Store);
n->Store = n->Children[0]->Store; /*XXX new */
return inst;
@ -883,13 +891,17 @@ emit_cond(slang_var_table *vt, slang_ir_node *n, struct gl_program *prog)
/* This'll happen for things like "if (i) ..." where no code
* is normally generated for the expression "i".
* Generate a move instruction just to set condition codes.
* Note: must use full 4-component vector since all four
* condition codes must be set identically.
*/
alloc_temp_storage(vt, n, 1);
alloc_temp_storage(vt, n, 4);
inst = new_instruction(prog, OPCODE_MOV);
inst->CondUpdate = GL_TRUE;
storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask);
storage_to_src_reg(&inst->SrcReg[0], n->Children[0]->Store);
_slang_free_temp(vt, n->Store->Index, n->Store->Size);
_slang_free_temp(vt, n->Store->Index, n->Store->Size,
n->Store->Swizzle);
inst->Comment = _mesa_strdup("COND expr");
return inst; /* XXX or null? */
}
}
@ -928,12 +940,16 @@ emit(slang_var_table *vt, slang_ir_node *n, struct gl_program *prog)
assert(n->Store->Index < 0);
if (!n->Var || n->Var->isTemp) {
/* a nameless/temporary variable, will be freed after first use */
n->Store->Index = _slang_alloc_temp(vt, n->Store->Size);
GLuint swizzle;
n->Store->Index = _slang_alloc_temp(vt, n->Store->Size, &swizzle);
n->Store->Swizzle = swizzle;
}
else {
/* a regular variable */
GLuint swizzle;
_slang_add_variable(vt, n->Var);
n->Store->Index = _slang_alloc_var(vt, n->Store->Size);
n->Store->Index = _slang_alloc_var(vt, n->Store->Size, &swizzle);
n->Store->Swizzle = swizzle;
/*
printf("IR_VAR_DECL %s %d store %p\n",
(char*) n->Var->a_name, n->Store->Index, (void*) n->Store);

View file

@ -4,6 +4,7 @@
#include "slang_compile_variable.h"
#include "slang_vartable.h"
#include "slang_ir.h"
#include "prog_instruction.h"
static int dbg = 0;
@ -23,7 +24,8 @@ struct slang_var_table_
int num_entries;
slang_variable **vars; /* array [num_entries] */
TempState temps[MAX_PROGRAM_TEMPS];
TempState temps[MAX_PROGRAM_TEMPS * 4];
int size[MAX_PROGRAM_TEMPS];
struct slang_var_table_ *parent;
};
@ -46,6 +48,7 @@ _slang_push_var_table(slang_var_table *parent)
if (parent) {
/* copy the info indicating which temp regs are in use */
memcpy(t->temps, parent->temps, sizeof(t->temps));
memcpy(t->size, parent->size, sizeof(t->size));
}
if (dbg) printf("Pushing level %d\n", t->level);
}
@ -67,12 +70,22 @@ _slang_pop_var_table(slang_var_table *t)
/* free the storage allocated for each variable */
for (i = 0; i < t->num_entries; i++) {
slang_ir_storage *store = (slang_ir_storage *) t->vars[i]->aux;
GLint j, sz4 = (store->Size + 3) / 4;
if (dbg) printf(" Free var %s, size %d\n",
(char*) t->vars[i]->a_name, store->Size);
for (j = 0; j < sz4; j++) {
assert(t->temps[store->Index + j] == VAR);
t->temps[store->Index + j] = FREE;
GLint j;
const GLuint sz = store->Size;
GLuint comp;
if (dbg) printf(" Free var %s, size %d at %d\n",
(char*) t->vars[i]->a_name, store->Size,
store->Index);
if (sz == 1)
comp = GET_SWZ(store->Swizzle, 0);
else
comp = 0;
assert(store->Index >= 0);
for (j = 0; j < sz; j++) {
assert(t->temps[store->Index * 4 + j + comp] == VAR);
t->temps[store->Index * 4 + j + comp] = FREE;
}
store->Index = -1;
}
@ -80,9 +93,9 @@ _slang_pop_var_table(slang_var_table *t)
/* just verify that any remaining allocations in this scope
* were for temps
*/
for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
for (i = 0; i < MAX_PROGRAM_TEMPS * 4; i++) {
if (t->temps[i] && !t->parent->temps[i]) {
if (dbg) printf(" Free reg %d\n", i);
if (dbg) printf(" Free reg %d\n", i/4);
assert(t->temps[i] == TEMP);
}
}
@ -131,16 +144,22 @@ _slang_find_variable(const slang_var_table *t, slang_atom name)
}
/**
* Allocation helper.
* \param size var size in floats
* \return position for var, measured in floats
*/
static GLint
alloc_reg(slang_var_table *t, GLint size, GLboolean isTemp)
{
const GLuint sz4 = (size + 3) / 4;
/* if size == 1, allocate anywhere, else, pos must be multiple of 4 */
const GLuint step = (size == 1) ? 1 : 4;
GLuint i, j;
assert(size > 0); /* number of floats */
for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
for (i = 0; i < MAX_PROGRAM_TEMPS - size; i += step) {
GLuint found = 0;
for (j = 0; j < sz4; j++) {
for (j = 0; j < size; j++) {
if (i + j < MAX_PROGRAM_TEMPS && !t->temps[i + j]) {
found++;
}
@ -148,10 +167,14 @@ alloc_reg(slang_var_table *t, GLint size, GLboolean isTemp)
break;
}
}
if (found == sz4) {
/* found block of size/4 free regs */
for (j = 0; j < sz4; j++)
if (found == size) {
/* found block of size free regs */
if (size > 1)
assert(i % 4 == 0);
for (j = 0; j < size; j++)
t->temps[i + j] = isTemp ? TEMP : VAR;
printf("t->size[%d] = %d\n", i, size);
t->size[i] = size;
return i;
}
}
@ -161,61 +184,98 @@ alloc_reg(slang_var_table *t, GLint size, GLboolean isTemp)
/**
* Allocate temp register(s) for storing a variable.
* \param size size needed, in floats
* \param swizzle returns swizzle mask for accessing var in register
* \return register allocated, or -1
*/
GLint
_slang_alloc_var(slang_var_table *t, GLint size)
_slang_alloc_var(slang_var_table *t, GLint size, GLuint *swizzle)
{
int i = alloc_reg(t, size, GL_FALSE);
if (dbg) printf("Alloc var %d (level %d)\n", i, t->level);
return i;
}
if (i < 0)
return -1;
void
_slang_reserve_var(slang_var_table *t, GLint r, GLint size)
{
const GLint sz4 = (size + 3) / 4;
GLint i;
for (i = 0; i < sz4; i++) {
t->temps[r + i] = VAR;
if (size == 1) {
GLuint comp = i % 4;
*swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
char swz = "xyzw"[comp];
if (dbg) printf("Alloc var sz %d at %d.%c (level %d)\n", size, i/4, swz, t->level);
}
else {
*swizzle = SWIZZLE_NOOP;
if (dbg) printf("Alloc var sz %d at %d.xyzw (level %d)\n", size, i/4, t->level);
}
return i / 4;
}
/**
* Allocate temp register(s) for storing an unnamed intermediate value.
*/
GLint
_slang_alloc_temp(slang_var_table *t, GLint size)
_slang_alloc_temp(slang_var_table *t, GLint size, GLuint *swizzle)
{
int i = alloc_reg(t, size, GL_TRUE);
if (dbg) printf("Alloc temp %d (level %d)\n", i, t->level);
return i;
if (i < 0)
return -1;
if (size == 1) {
GLuint comp = i % 4;
assert(comp < 4);
int swz = "xyzw"[comp];
*swizzle = MAKE_SWIZZLE4(comp, comp, comp, comp);
if (dbg) printf("Alloc temp sz %d at %d.%c (level %d)\n",
size, i/4, swz, t->level);
}
else {
*swizzle = SWIZZLE_NOOP;
if (dbg) printf("Alloc temp sz %d at %d.xyzw (level %d)\n",
size, i/4, t->level);
}
return i / 4;
}
void
_slang_free_temp(slang_var_table *t, GLint r, GLint size)
_slang_free_temp(slang_var_table *t, GLint r, GLint size, GLuint swizzle)
{
const GLuint sz4 = (size + 3) / 4;
GLuint i;
assert(size > 0);
assert(r >= 0);
assert(r < MAX_PROGRAM_TEMPS);
if (dbg) printf("Free temp %d (level %d)\n", r, t->level);
for (i = 0; i < sz4; i++) {
assert(t->temps[r + i] == TEMP);
t->temps[r + i] = FREE;
assert(r + size <= MAX_PROGRAM_TEMPS);
if (dbg) printf("Free temp sz %d at %d (level %d)\n", size, r, t->level);
if (size == 1) {
GLuint comp = GET_SWZ(swizzle, 0);
assert(swizzle == MAKE_SWIZZLE4(comp, comp, comp, comp));
assert(comp < 4);
assert(t->size[r * 4 + comp] == 1);
assert(t->temps[r * 4 + comp] == TEMP);
t->temps[r * 4 + comp] = FREE;
}
else {
assert(swizzle == SWIZZLE_NOOP);
assert(t->size[r*4] == size);
for (i = 0; i < size; i++) {
assert(t->temps[r * 4 + i] == TEMP);
t->temps[r * 4 + i] = FREE;
}
}
}
GLboolean
_slang_is_temp(slang_var_table *t, GLint r)
_slang_is_temp(slang_var_table *t, GLint r, GLuint swizzle)
{
assert(r >= 0);
assert(r < MAX_PROGRAM_TEMPS);
if (t->temps[r] == TEMP)
GLuint comp;
if (swizzle == SWIZZLE_NOOP)
comp = 0;
else
comp = GET_SWZ(swizzle, 0);
if (t->temps[r * 4 + comp] == TEMP)
return GL_TRUE;
else
return GL_FALSE;

View file

@ -20,19 +20,16 @@ extern struct slang_variable_ *
_slang_find_variable(const slang_var_table *t, slang_atom name);
extern GLint
_slang_alloc_var(slang_var_table *t, GLint size);
extern void
_slang_reserve_var(slang_var_table *t, GLint r, GLint size);
_slang_alloc_var(slang_var_table *t, GLint size, GLuint *swizzle);
extern GLint
_slang_alloc_temp(slang_var_table *t, GLint size);
_slang_alloc_temp(slang_var_table *t, GLint size, GLuint *swizzle);
extern void
_slang_free_temp(slang_var_table *t, GLint r, GLint size);
_slang_free_temp(slang_var_table *t, GLint r, GLint size, GLuint swizzle);
extern GLboolean
_slang_is_temp(slang_var_table *t, GLint r);
_slang_is_temp(slang_var_table *t, GLint r, GLuint swizzle);
#endif /* SLANG_VARTABLE_H */