Merge branch 'const-buffer-changes'

Conflicts:

	src/mesa/drivers/dri/i965/brw_curbe.c
	src/mesa/drivers/dri/i965/brw_vs_emit.c
	src/mesa/drivers/dri/i965/brw_wm_glsl.c
This commit is contained in:
Brian Paul 2009-05-01 09:37:14 -06:00
commit b9196c1fa3
19 changed files with 507 additions and 234 deletions

View file

@ -73,7 +73,7 @@ i915InvalidateState(GLcontext * ctx, GLuint new_state)
p->params_uptodate = 0;
}
if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM))
if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))
i915_update_fog(ctx);
}

View file

@ -245,6 +245,9 @@ struct brw_vs_ouput_sizes {
};
/** Number of general purpose registers (VS, WM, etc) */
#define BRW_MAX_GRF 128
/** Number of texture sampler units */
#define BRW_MAX_TEX_UNIT 16
@ -450,8 +453,6 @@ struct brw_context
struct {
struct brw_state_flags dirty;
struct brw_tracked_state **atoms;
GLuint nr_atoms;
GLuint nr_color_regions;
struct intel_region *color_regions[MAX_DRAW_BUFFERS];
@ -471,7 +472,8 @@ struct brw_context
int validated_bo_count;
} state;
struct brw_cache cache;
struct brw_cache cache; /** non-surface items */
struct brw_cache surface_cache; /* surface items */
struct brw_cached_batch_item *cached_batch_items;
struct {
@ -555,11 +557,6 @@ struct brw_context
GLuint vs_size;
GLuint total_size;
/* Dynamic tracker which changes to reflect the state referenced
* by active fp and vp program parameters:
*/
struct brw_tracked_state tracked_state;
dri_bo *curbe_bo;
/** Offset within curbe_bo of space for current curbe entry */
GLuint curbe_offset;

View file

@ -36,6 +36,7 @@
#include "main/macros.h"
#include "main/enums.h"
#include "shader/prog_parameter.h"
#include "shader/prog_print.h"
#include "shader/prog_statevars.h"
#include "intel_batchbuffer.h"
#include "intel_regions.h"
@ -188,13 +189,6 @@ static void prepare_constant_buffer(struct brw_context *brw)
GLfloat *buf;
GLuint i;
/* Update our own dependency flags. This works because this
* function will also be called whenever fp or vp changes.
*/
brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION);
brw->curbe.tracked_state.dirty.mesa |= vp->program.Base.Parameters->StateFlags;
brw->curbe.tracked_state.dirty.mesa |= fp->program.Base.Parameters->StateFlags;
if (sz == 0) {
if (brw->curbe.last_buf) {
free(brw->curbe.last_buf);
@ -363,11 +357,7 @@ update_constant_buffer(struct brw_context *brw,
}
if (0) {
int i;
for (i = 0; i < params->NumParameters; i++) {
float *p = params->ParameterValues[i];
printf("%d: %f %f %f %f\n", i, p[0], p[1], p[2], p[3]);
}
_mesa_print_parameter_list(params);
}
}
}
@ -380,7 +370,7 @@ update_vertex_constant_buffer(struct brw_context *brw)
struct brw_vertex_program *vp =
(struct brw_vertex_program *) brw->vertex_program;
if (0) {
printf("update VS constants in buffer %p\n", vp->const_buffer);
printf("update VS constants in buffer %p vp = %p\n", vp->const_buffer, vp);
printf("program %u\n", vp->program.Base.Id);
}
if (vp->use_const_buffer)
@ -394,6 +384,10 @@ update_fragment_constant_buffer(struct brw_context *brw)
{
struct brw_fragment_program *fp =
(struct brw_fragment_program *) brw->fragment_program;
if (0) {
printf("update WM constants in buffer %p\n", fp->const_buffer);
printf("program %u\n", fp->program.Base.Id);
}
if (fp->use_const_buffer)
update_constant_buffer(brw, fp->program.Base.Parameters, fp->const_buffer);
}
@ -428,7 +422,7 @@ static void emit_constant_buffer(struct brw_context *brw)
*/
const struct brw_tracked_state brw_constant_buffer = {
.dirty = {
.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION), /* plus fp and vp flags */
.mesa = _NEW_PROGRAM_CONSTANTS,
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_VERTEX_PROGRAM |
BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */

View file

@ -135,8 +135,8 @@ dri_bo *brw_search_cache( struct brw_cache *cache,
void *aux_return);
void brw_state_cache_check_size( struct brw_context *brw );
void brw_init_cache( struct brw_context *brw );
void brw_destroy_cache( struct brw_context *brw );
void brw_init_caches( struct brw_context *brw );
void brw_destroy_caches( struct brw_context *brw );
/***********************************************************************
* brw_state_batch.c

View file

@ -56,9 +56,9 @@
* incorrect program is run for the other instance.
*/
#include "main/imports.h"
#include "brw_state.h"
#include "intel_batchbuffer.h"
#include "main/imports.h"
/* XXX: Fixme - have to include these to get the sizes of the prog_key
* structs:
@ -69,8 +69,10 @@
#include "brw_sf.h"
#include "brw_gs.h"
static GLuint hash_key( const void *key, GLuint key_size,
dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
static GLuint
hash_key(const void *key, GLuint key_size,
dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
{
GLuint *ikey = (GLuint *)key;
GLuint hash = 0, i;
@ -95,6 +97,7 @@ static GLuint hash_key( const void *key, GLuint key_size,
return hash;
}
/**
* Marks a new buffer as being chosen for the given cache id.
*/
@ -111,6 +114,7 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
cache->brw->state.dirty.cache |= 1 << cache_id;
}
static struct brw_cache_item *
search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
GLuint hash, const void *key, GLuint key_size,
@ -143,7 +147,8 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
}
static void rehash( struct brw_cache *cache )
static void
rehash(struct brw_cache *cache)
{
struct brw_cache_item **items;
struct brw_cache_item *c, *next;
@ -164,15 +169,17 @@ static void rehash( struct brw_cache *cache )
cache->size = size;
}
/**
* Returns the buffer object matching cache_id and key, or NULL.
*/
dri_bo *brw_search_cache( struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *key,
GLuint key_size,
dri_bo **reloc_bufs, GLuint nr_reloc_bufs,
void *aux_return )
dri_bo *
brw_search_cache(struct brw_cache *cache,
enum brw_cache_id cache_id,
const void *key,
GLuint key_size,
dri_bo **reloc_bufs, GLuint nr_reloc_bufs,
void *aux_return)
{
struct brw_cache_item *item;
GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
@ -192,6 +199,7 @@ dri_bo *brw_search_cache( struct brw_cache *cache,
return item->bo;
}
dri_bo *
brw_upload_cache( struct brw_cache *cache,
enum brw_cache_id cache_id,
@ -265,7 +273,9 @@ brw_upload_cache( struct brw_cache *cache,
return bo;
}
/* This doesn't really work with aux data. Use search/upload instead
/**
* This doesn't really work with aux data. Use search/upload instead
*/
dri_bo *
brw_cache_data_sz(struct brw_cache *cache,
@ -296,6 +306,7 @@ brw_cache_data_sz(struct brw_cache *cache,
return bo;
}
/**
* Wrapper around brw_cache_data_sz using the cache_id's canonical key size.
*
@ -319,21 +330,22 @@ enum pool_type {
DW_GENERAL_STATE
};
static void
brw_init_cache_id( struct brw_context *brw,
const char *name,
enum brw_cache_id id,
GLuint key_size,
GLuint aux_size)
{
struct brw_cache *cache = &brw->cache;
static void
brw_init_cache_id(struct brw_cache *cache,
const char *name,
enum brw_cache_id id,
GLuint key_size,
GLuint aux_size)
{
cache->name[id] = strdup(name);
cache->key_size[id] = key_size;
cache->aux_size[id] = aux_size;
}
void brw_init_cache( struct brw_context *brw )
static void
brw_init_non_surface_cache(struct brw_context *brw)
{
struct brw_cache *cache = &brw->cache;
@ -342,114 +354,136 @@ void brw_init_cache( struct brw_context *brw )
cache->size = 7;
cache->n_items = 0;
cache->items = (struct brw_cache_item **)
_mesa_calloc(cache->size *
sizeof(struct brw_cache_item));
_mesa_calloc(cache->size * sizeof(struct brw_cache_item));
brw_init_cache_id(brw,
brw_init_cache_id(cache,
"CC_VP",
BRW_CC_VP,
sizeof(struct brw_cc_viewport),
0);
brw_init_cache_id(brw,
brw_init_cache_id(cache,
"CC_UNIT",
BRW_CC_UNIT,
sizeof(struct brw_cc_unit_state),
0);
brw_init_cache_id(brw,
brw_init_cache_id(cache,
"WM_PROG",
BRW_WM_PROG,
sizeof(struct brw_wm_prog_key),
sizeof(struct brw_wm_prog_data));
brw_init_cache_id(brw,
brw_init_cache_id(cache,
"SAMPLER_DEFAULT_COLOR",
BRW_SAMPLER_DEFAULT_COLOR,
sizeof(struct brw_sampler_default_color),
0);
brw_init_cache_id(brw,
brw_init_cache_id(cache,
"SAMPLER",
BRW_SAMPLER,
0, /* variable key/data size */
0);
brw_init_cache_id(brw,
brw_init_cache_id(cache,
"WM_UNIT",
BRW_WM_UNIT,
sizeof(struct brw_wm_unit_state),
0);
brw_init_cache_id(brw,
brw_init_cache_id(cache,
"SF_PROG",
BRW_SF_PROG,
sizeof(struct brw_sf_prog_key),
sizeof(struct brw_sf_prog_data));
brw_init_cache_id(brw,
brw_init_cache_id(cache,
"SF_VP",
BRW_SF_VP,
sizeof(struct brw_sf_viewport),
0);
brw_init_cache_id(brw,
brw_init_cache_id(cache,
"SF_UNIT",
BRW_SF_UNIT,
sizeof(struct brw_sf_unit_state),
0);
brw_init_cache_id(brw,
brw_init_cache_id(cache,
"VS_UNIT",
BRW_VS_UNIT,
sizeof(struct brw_vs_unit_state),
0);
brw_init_cache_id(brw,
brw_init_cache_id(cache,
"VS_PROG",
BRW_VS_PROG,
sizeof(struct brw_vs_prog_key),
sizeof(struct brw_vs_prog_data));
brw_init_cache_id(brw,
brw_init_cache_id(cache,
"CLIP_UNIT",
BRW_CLIP_UNIT,
sizeof(struct brw_clip_unit_state),
0);
brw_init_cache_id(brw,
brw_init_cache_id(cache,
"CLIP_PROG",
BRW_CLIP_PROG,
sizeof(struct brw_clip_prog_key),
sizeof(struct brw_clip_prog_data));
brw_init_cache_id(brw,
brw_init_cache_id(cache,
"GS_UNIT",
BRW_GS_UNIT,
sizeof(struct brw_gs_unit_state),
0);
brw_init_cache_id(brw,
brw_init_cache_id(cache,
"GS_PROG",
BRW_GS_PROG,
sizeof(struct brw_gs_prog_key),
sizeof(struct brw_gs_prog_data));
}
brw_init_cache_id(brw,
static void
brw_init_surface_cache(struct brw_context *brw)
{
struct brw_cache *cache = &brw->surface_cache;
cache->brw = brw;
cache->size = 7;
cache->n_items = 0;
cache->items = (struct brw_cache_item **)
_mesa_calloc(cache->size * sizeof(struct brw_cache_item));
brw_init_cache_id(cache,
"SS_SURFACE",
BRW_SS_SURFACE,
sizeof(struct brw_surface_state),
0);
brw_init_cache_id(brw,
brw_init_cache_id(cache,
"SS_SURF_BIND",
BRW_SS_SURF_BIND,
0,
0);
}
void
brw_init_caches(struct brw_context *brw)
{
brw_init_non_surface_cache(brw);
brw_init_surface_cache(brw);
}
static void
brw_clear_cache( struct brw_context *brw )
brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
{
struct brw_cache_item *c, *next;
GLuint i;
@ -457,8 +491,8 @@ brw_clear_cache( struct brw_context *brw )
if (INTEL_DEBUG & DEBUG_STATE)
_mesa_printf("%s\n", __FUNCTION__);
for (i = 0; i < brw->cache.size; i++) {
for (c = brw->cache.items[i]; c; c = next) {
for (i = 0; i < cache->size; i++) {
for (c = cache->items[i]; c; c = next) {
int j;
next = c->next;
@ -468,10 +502,10 @@ brw_clear_cache( struct brw_context *brw )
free((void *)c->key);
free(c);
}
brw->cache.items[i] = NULL;
cache->items[i] = NULL;
}
brw->cache.n_items = 0;
cache->n_items = 0;
if (brw->curbe.last_buf) {
_mesa_free(brw->curbe.last_buf);
@ -483,25 +517,40 @@ brw_clear_cache( struct brw_context *brw )
brw->state.dirty.cache |= ~0;
}
void brw_state_cache_check_size( struct brw_context *brw )
void
brw_state_cache_check_size(struct brw_context *brw)
{
/* un-tuned guess. We've got around 20 state objects for a total of around
* 32k, so 1000 of them is around 1.5MB.
*/
if (brw->cache.n_items > 1000)
brw_clear_cache(brw);
brw_clear_cache(brw, &brw->cache);
if (brw->surface_cache.n_items > 1000)
brw_clear_cache(brw, &brw->surface_cache);
}
void brw_destroy_cache( struct brw_context *brw )
static void
brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
{
GLuint i;
brw_clear_cache(brw);
brw_clear_cache(brw, cache);
for (i = 0; i < BRW_MAX_CACHE; i++) {
dri_bo_unreference(brw->cache.last_bo[i]);
free(brw->cache.name[i]);
dri_bo_unreference(cache->last_bo[i]);
free(cache->name[i]);
}
free(brw->cache.items);
brw->cache.items = NULL;
brw->cache.size = 0;
free(cache->items);
cache->items = NULL;
cache->size = 0;
}
void
brw_destroy_caches(struct brw_context *brw)
{
brw_destroy_cache(brw, &brw->cache);
brw_destroy_cache(brw, &brw->surface_cache);
}

View file

@ -59,7 +59,6 @@ const struct brw_tracked_state *atoms[] =
&brw_curbe_offsets,
&brw_recalculate_urb_fence,
&brw_cc_vp,
&brw_cc_unit,
@ -88,54 +87,26 @@ const struct brw_tracked_state *atoms[] =
&brw_line_stipple,
&brw_aa_line_parameters,
/* Ordering of the commands below is documented as fixed.
*/
#if 0
&brw_pipelined_state_pointers,
&brw_urb_fence,
&brw_constant_buffer_state,
#else
&brw_psp_urb_cbs,
#endif
&brw_drawing_rect,
&brw_indices,
&brw_vertices,
NULL, /* brw_constant_buffer */
&brw_constant_buffer
};
void brw_init_state( struct brw_context *brw )
{
GLuint i;
brw_init_cache(brw);
brw->state.atoms = _mesa_malloc(sizeof(atoms));
brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms);
_mesa_memcpy(brw->state.atoms, atoms, sizeof(atoms));
/* Patch in a pointer to the dynamic state atom:
*/
for (i = 0; i < brw->state.nr_atoms; i++)
if (brw->state.atoms[i] == NULL)
brw->state.atoms[i] = &brw->curbe.tracked_state;
_mesa_memcpy(&brw->curbe.tracked_state,
&brw_constant_buffer,
sizeof(brw_constant_buffer));
brw_init_caches(brw);
}
void brw_destroy_state( struct brw_context *brw )
{
if (brw->state.atoms) {
_mesa_free(brw->state.atoms);
brw->state.atoms = NULL;
}
brw_destroy_cache(brw);
brw_destroy_caches(brw);
brw_destroy_batch_cache(brw);
}
@ -218,6 +189,7 @@ static struct dirty_bit_map mesa_bits[] = {
DEFINE_BIT(_NEW_MULTISAMPLE),
DEFINE_BIT(_NEW_TRACK_MATRIX),
DEFINE_BIT(_NEW_PROGRAM),
DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
{0, 0, 0}
};
@ -336,7 +308,7 @@ void brw_validate_state( struct brw_context *brw )
/* do prepare stage for all atoms */
for (i = 0; i < Elements(atoms); i++) {
const struct brw_tracked_state *atom = brw->state.atoms[i];
const struct brw_tracked_state *atom = atoms[i];
if (brw->intel.Fallback)
break;
@ -367,8 +339,8 @@ void brw_upload_state(struct brw_context *brw)
_mesa_memset(&examined, 0, sizeof(examined));
prev = *state;
for (i = 0; i < brw->state.nr_atoms; i++) {
const struct brw_tracked_state *atom = brw->state.atoms[i];
for (i = 0; i < Elements(atoms); i++) {
const struct brw_tracked_state *atom = atoms[i];
struct brw_state_flags generated;
assert(atom->dirty.mesa ||
@ -397,7 +369,7 @@ void brw_upload_state(struct brw_context *brw)
}
else {
for (i = 0; i < Elements(atoms); i++) {
const struct brw_tracked_state *atom = brw->state.atoms[i];
const struct brw_tracked_state *atom = atoms[i];
if (brw->intel.Fallback)
break;

View file

@ -69,13 +69,18 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
{
GLuint i, reg = 0, mrf;
#if 0
if (c->vp->program.Base.Parameters->NumParameters >= 6)
c->vp->use_const_buffer = 1;
/* Determine whether to use a real constant buffer or use a block
* of GRF registers for constants. The later is faster but only
* works if everything fits in the GRF.
* XXX this heuristic/check may need some fine tuning...
*/
if (c->vp->program.Base.Parameters->NumParameters +
c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF)
c->vp->use_const_buffer = GL_TRUE;
else
#endif
c->vp->use_const_buffer = GL_FALSE;
/*printf("use_const_buffer = %d\n", c->use_const_buffer);*/
/*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/
/* r0 -- reserved as usual
*/

View file

@ -240,15 +240,18 @@ struct brw_wm_compile {
GLuint max_wm_grf;
GLuint last_scratch;
GLuint cur_inst; /**< index of current instruction */
/** Mapping from Mesa registers to hardware registers */
struct {
GLboolean inited;
struct brw_reg reg;
} wm_regs[PROGRAM_PAYLOAD+1][256][4];
GLboolean used_grf[BRW_WM_MAX_GRF];
GLuint first_free_grf;
struct brw_reg stack;
struct brw_reg emit_mask_reg;
GLuint reg_index; /**< Index of next free GRF register */
GLuint tmp_regs[BRW_WM_MAX_GRF];
GLuint tmp_index;
GLuint tmp_max;

View file

@ -1,5 +1,7 @@
#include "main/macros.h"
#include "shader/prog_parameter.h"
#include "shader/prog_print.h"
#include "shader/prog_optimize.h"
#include "brw_context.h"
#include "brw_eu.h"
#include "brw_wm.h"
@ -42,6 +44,76 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
}
static void
reclaim_temps(struct brw_wm_compile *c);
/** Mark GRF register as used. */
static void
prealloc_grf(struct brw_wm_compile *c, int r)
{
c->used_grf[r] = GL_TRUE;
}
/** Mark given GRF register as not in use. */
static void
release_grf(struct brw_wm_compile *c, int r)
{
/*assert(c->used_grf[r]);*/
c->used_grf[r] = GL_FALSE;
c->first_free_grf = MIN2(c->first_free_grf, r);
}
/** Return index of a free GRF, mark it as used. */
static int
alloc_grf(struct brw_wm_compile *c)
{
GLuint r;
for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
if (!c->used_grf[r]) {
c->used_grf[r] = GL_TRUE;
c->first_free_grf = r + 1; /* a guess */
return r;
}
}
/* no free temps, try to reclaim some */
reclaim_temps(c);
c->first_free_grf = 0;
/* try alloc again */
for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
if (!c->used_grf[r]) {
c->used_grf[r] = GL_TRUE;
c->first_free_grf = r + 1; /* a guess */
return r;
}
}
for (r = 0; r < BRW_WM_MAX_GRF; r++) {
assert(c->used_grf[r]);
}
/*printf("Really out of temp regs!\n");*/
return 60;
}
/** Return number of GRF registers used */
static int
num_grf_used(const struct brw_wm_compile *c)
{
int r;
for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--)
if (c->used_grf[r])
return r + 1;
return 0;
}
/**
* Record the mapping of a Mesa register to a hardware register.
*/
@ -68,11 +140,18 @@ static int get_scalar_dst_index(const struct prog_instruction *inst)
static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
{
struct brw_reg reg;
if(c->tmp_index == c->tmp_max)
c->tmp_regs[ c->tmp_max++ ] = c->reg_index++;
/* if we need to allocate another temp, grow the tmp_regs[] array */
if (c->tmp_index == c->tmp_max) {
c->tmp_regs[ c->tmp_max++ ] = alloc_grf(c);
}
/* form the GRF register */
reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0);
/*printf("alloc_temp %d\n", reg.nr);*/
assert(reg.nr < BRW_WM_MAX_GRF);
return reg;
}
/**
@ -130,35 +209,26 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component,
return brw_null_reg();
}
assert(index < 256);
/* see if we've already allocated a HW register for this Mesa register */
if (c->wm_regs[file][index][component].inited) {
/* yes, re-use */
reg = c->wm_regs[file][index][component].reg;
/* yes, re-use */
reg = c->wm_regs[file][index][component].reg;
}
else {
/* no, allocate new register */
reg = brw_vec8_grf(c->reg_index, 0);
int grf = alloc_grf(c);
if (grf < 0) {
/* totally out of temps */
grf = 70; /* XXX !!!! */
}
reg = brw_vec8_grf(grf, 0);
/*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
set_reg(c, file, index, component, reg);
}
/* if this is a new register allocation, record it in the table */
if (!c->wm_regs[file][index][component].inited) {
set_reg(c, file, index, component, reg);
c->reg_index++;
}
if (c->reg_index >= BRW_WM_MAX_GRF - 12) {
/* ran out of temporary registers! */
#if 1
/* This is a big hack for now.
* Return bad register index, just don't hang the GPU.
*/
_mesa_fprintf(stderr, "out of regs %d\n", c->reg_index);
c->reg_index = BRW_WM_MAX_GRF - 13;
#else
return brw_null_reg();
#endif
}
if (neg & (1 << component)) {
reg = negate(reg);
}
@ -168,6 +238,46 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component,
}
/**
* This is called if we run out of GRF registers. Examine the live intervals
* of temp regs in the program and free those which won't be used again.
*/
static void
reclaim_temps(struct brw_wm_compile *c)
{
GLint intBegin[MAX_PROGRAM_TEMPS];
GLint intEnd[MAX_PROGRAM_TEMPS];
int index;
/*printf("Reclaim temps:\n");*/
_mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns,
intBegin, intEnd);
for (index = 0; index < MAX_PROGRAM_TEMPS; index++) {
if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) {
/* program temp[i] can be freed */
int component;
/*printf(" temp[%d] is dead\n", index);*/
for (component = 0; component < 4; component++) {
if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) {
int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr;
release_grf(c, r);
/*
printf(" Reclaim temp %d, reg %d at inst %d\n",
index, r, c->cur_inst);
*/
c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE;
}
}
}
}
}
/**
* Preallocate registers. This sets up the Mesa to hardware register
* mapping for certain registers, such as constants (uniforms/state vars)
@ -179,6 +289,10 @@ static void prealloc_reg(struct brw_wm_compile *c)
struct brw_reg reg;
int nr_interp_regs = 0;
GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted;
GLuint reg_index = 0;
memset(c->used_grf, GL_FALSE, sizeof(c->used_grf));
c->first_free_grf = 0;
for (i = 0; i < 4; i++) {
if (i < c->key.nr_depth_regs)
@ -187,14 +301,20 @@ static void prealloc_reg(struct brw_wm_compile *c)
reg = brw_vec8_grf(0, 0);
set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
}
c->reg_index += 2 * c->key.nr_depth_regs;
reg_index += 2 * c->key.nr_depth_regs;
/* constants */
{
const int nr_params = c->fp->program.Base.Parameters->NumParameters;
const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters;
const GLuint nr_temps = c->fp->program.Base.NumTemporaries;
/* use a real constant buffer, or just use a section of the GRF? */
c->fp->use_const_buffer = GL_FALSE; /* (nr_params > 8);*/
/* XXX this heuristic may need adjustment... */
if ((nr_params + nr_temps) * 4 + reg_index > 80)
c->fp->use_const_buffer = GL_TRUE;
else
c->fp->use_const_buffer = GL_FALSE;
/*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
if (c->fp->use_const_buffer) {
/* We'll use a real constant buffer and fetch constants from
@ -216,7 +336,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
for (i = 0; i < nr_params; i++) {
/* loop over XYZW channels */
for (j = 0; j < 4; j++, index++) {
reg = brw_vec1_grf(c->reg_index + index / 8, index % 8);
reg = brw_vec1_grf(reg_index + index / 8, index % 8);
/* Save pointer to parameter/constant value.
* Constants will be copied in prepare_constant_buffer()
*/
@ -226,7 +346,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
}
/* number of constant regs used (each reg is float[8]) */
c->nr_creg = 2 * ((4 * nr_params + 15) / 16);
c->reg_index += c->nr_creg;
reg_index += c->nr_creg;
}
}
@ -234,20 +354,24 @@ static void prealloc_reg(struct brw_wm_compile *c)
for (i = 0; i < FRAG_ATTRIB_MAX; i++) {
if (inputs & (1<<i)) {
nr_interp_regs++;
reg = brw_vec8_grf(c->reg_index, 0);
reg = brw_vec8_grf(reg_index, 0);
for (j = 0; j < 4; j++)
set_reg(c, PROGRAM_PAYLOAD, i, j, reg);
c->reg_index += 2;
reg_index += 2;
}
}
c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
c->prog_data.urb_read_length = nr_interp_regs * 2;
c->prog_data.curb_read_length = c->nr_creg;
c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
c->reg_index++;
c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
c->reg_index += 2;
c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
reg_index++;
c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
reg_index += 2;
/* mark GRF regs [0..reg_index-1] as in-use */
for (i = 0; i < reg_index; i++)
prealloc_grf(c, i);
/* An instruction may reference up to three constants.
* They'll be found in these registers.
@ -256,7 +380,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
if (c->fp->use_const_buffer) {
for (i = 0; i < 3; i++) {
c->current_const[i].index = -1;
c->current_const[i].reg = alloc_tmp(c);
c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0);
}
}
#if 0
@ -2595,7 +2719,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
struct brw_compile *p = &c->func;
struct brw_indirect stack_index = brw_indirect(0, 0);
c->reg_index = 0;
prealloc_reg(c);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
@ -2603,6 +2726,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
for (i = 0; i < c->nr_fp_insns; i++) {
const struct prog_instruction *inst = &c->prog_instructions[i];
c->cur_inst = i;
#if 0
_mesa_printf("Inst %d: ", i);
_mesa_print_instruction(inst);
@ -2833,17 +2958,13 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
_mesa_printf("unsupported IR in fragment shader %d\n",
inst->Opcode);
}
if (inst->CondUpdate)
brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
else
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
post_wm_emit(c);
if (c->reg_index >= BRW_WM_MAX_GRF) {
_mesa_problem(NULL, "Ran out of registers in brw_wm_emit_glsl()");
/* XXX we need to do some proper error recovery here */
}
}
@ -2867,6 +2988,6 @@ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
brw_wm_print_program(c, "brw_wm_glsl_emit done");
}
c->prog_data.total_grf = c->reg_index;
c->prog_data.total_grf = num_grf_used(c);
c->prog_data.total_scratch = 0;
}

View file

@ -268,7 +268,7 @@ brw_create_texture_surface( struct brw_context *brw,
surf.ss0.cube_neg_z = 1;
}
bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
key, sizeof(*key),
&key->bo, key->bo ? 1 : 0,
&surf, sizeof(surf),
@ -321,10 +321,11 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
key.tiling = intelObj->mt->region->tiling;
dri_bo_unreference(brw->wm.surf_bo[surf]);
brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
&key, sizeof(key),
&key.bo, key.bo ? 1 : 0,
NULL);
brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
BRW_SS_SURFACE,
&key, sizeof(key),
&key.bo, key.bo ? 1 : 0,
NULL);
if (brw->wm.surf_bo[surf] == NULL) {
brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key);
}
@ -362,7 +363,7 @@ brw_create_constant_surface( struct brw_context *brw,
surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */
brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */
bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
key, sizeof(*key),
&key->bo, key->bo ? 1 : 0,
&surf, sizeof(surf),
@ -427,7 +428,8 @@ brw_update_wm_constant_surface( GLcontext *ctx,
*/
dri_bo_unreference(brw->wm.surf_bo[surf]);
brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
BRW_SS_SURFACE,
&key, sizeof(key),
&key.bo, key.bo ? 1 : 0,
NULL);
@ -456,17 +458,14 @@ brw_update_vs_constant_surface( GLcontext *ctx,
assert(surf == 0);
/* free old const buffer if too small */
if (const_buffer && const_buffer->size < size) {
dri_bo_unreference(const_buffer);
const_buffer = NULL;
}
/* We always create a new VS constant buffer so that several can be
* in flight at a time. Free the old one first...
*/
dri_bo_unreference(const_buffer);
/* alloc new buffer if needed */
if (!const_buffer) {
const_buffer =
drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64);
}
/* alloc new buffer */
const_buffer =
drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64);
memset(&key, 0, sizeof(key));
@ -487,7 +486,8 @@ brw_update_vs_constant_surface( GLcontext *ctx,
*/
dri_bo_unreference(brw->vs.surf_bo[surf]);
brw->vs.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
BRW_SS_SURFACE,
&key, sizeof(key),
&key.bo, key.bo ? 1 : 0,
NULL);
@ -569,10 +569,11 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
dri_bo_unreference(brw->wm.surf_bo[unit]);
brw->wm.surf_bo[unit] = NULL;
if (cached)
brw->wm.surf_bo[unit] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
&key, sizeof(key),
&region_bo, 1,
NULL);
brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache,
BRW_SS_SURFACE,
&key, sizeof(key),
&region_bo, 1,
NULL);
if (brw->wm.surf_bo[unit] == NULL) {
struct brw_surface_state surf;
@ -598,7 +599,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
surf.ss0.writedisable_alpha = !key.color_mask[3];
/* Key size will never match key size for textures, so we're safe. */
brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache,
BRW_SS_SURFACE,
&key, sizeof(key),
&region_bo, 1,
&surf, sizeof(surf),
@ -630,7 +632,7 @@ brw_wm_get_binding_table(struct brw_context *brw)
assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF);
bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND,
bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
NULL, 0,
brw->wm.surf_bo, brw->wm.nr_surfaces,
NULL);
@ -646,7 +648,7 @@ brw_wm_get_binding_table(struct brw_context *brw)
else
data[i] = 0;
bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND,
bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
NULL, 0,
brw->wm.surf_bo, brw->wm.nr_surfaces,
data, data_size,
@ -746,7 +748,7 @@ brw_vs_get_binding_table(struct brw_context *brw)
assert(brw->vs.nr_surfaces <= BRW_VS_MAX_SURF);
bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND,
bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
NULL, 0,
brw->vs.surf_bo, brw->vs.nr_surfaces,
NULL);
@ -762,7 +764,7 @@ brw_vs_get_binding_table(struct brw_context *brw)
else
data[i] = 0;
bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND,
bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
NULL, 0,
brw->vs.surf_bo, brw->vs.nr_surfaces,
data, data_size,
@ -787,8 +789,7 @@ brw_vs_get_binding_table(struct brw_context *brw)
/**
* Vertex shader surfaces. Just constant buffer for now. Could add vertex
* shader textures in the future.
* Vertex shader surfaces (constant buffer).
*/
static void prepare_vs_surfaces(struct brw_context *brw )
{
@ -824,8 +825,12 @@ prepare_surfaces(struct brw_context *brw)
const struct brw_tracked_state brw_wm_surfaces = {
.dirty = {
.mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS | _NEW_PROGRAM,
.brw = BRW_NEW_CONTEXT,
.mesa = (_NEW_COLOR |
_NEW_TEXTURE |
_NEW_BUFFERS |
_NEW_PROGRAM |
_NEW_PROGRAM_CONSTANTS),
.brw = (BRW_NEW_CONTEXT),
.cache = 0
},
.prepare = prepare_surfaces,

View file

@ -2484,7 +2484,7 @@ void r200ValidateState( GLcontext *ctx )
r200UpdateDrawBuffer(ctx);
}
if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM)) {
if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)) {
r200UpdateTextureState( ctx );
new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
r200UpdateLocalViewer( ctx );
@ -2523,6 +2523,7 @@ void r200ValidateState( GLcontext *ctx )
}
if (new_state & (_NEW_PROGRAM|
_NEW_PROGRAM_CONSTANTS |
/* need to test for pretty much anything due to possible parameter bindings */
_NEW_MODELVIEW|_NEW_PROJECTION|_NEW_TRANSFORM|
_NEW_LIGHT|_NEW_TEXTURE|_NEW_TEXTURE_MATRIX|

View file

@ -470,7 +470,8 @@ void r300TranslateFragmentShader(r300ContextPtr r300,
fp->translated = GL_TRUE;
if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL))
r300FragmentProgramDump(fp, &fp->code);
r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM);
r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM |
_NEW_PROGRAM_CONSTANTS);
}
update_params(r300, fp);

View file

@ -1109,7 +1109,7 @@ void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)
struct gl_program_parameter_list *paramList;
GLuint i;
if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM)))
if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)))
return;
fp = (struct r300_fragment_program *)ctx->FragmentProgram._Current;
@ -2357,11 +2357,12 @@ void r300UpdateShaders(r300ContextPtr rmesa)
hw_tcl_on = future_hw_tcl_on = 0;
r300ResetHwState(rmesa);
r300UpdateStateParameters(ctx, _NEW_PROGRAM);
r300UpdateStateParameters(ctx, _NEW_PROGRAM |
_NEW_PROGRAM_CONSTANTS);
return;
}
}
r300UpdateStateParameters(ctx, _NEW_PROGRAM);
r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
}
static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx,

View file

@ -501,7 +501,8 @@ void r500TranslateFragmentShader(r300ContextPtr r300,
_mesa_reference_program(r300->radeon.glCtx, &compiler.program, 0);
r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM);
r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM |
_NEW_PROGRAM_CONSTANTS);
if (RADEON_DEBUG & DEBUG_PIXEL) {
if (fp->translated) {

View file

@ -74,8 +74,6 @@ _mesa_BindProgram(GLenum target, GLuint id)
GET_CURRENT_CONTEXT(ctx);
ASSERT_OUTSIDE_BEGIN_END(ctx);
FLUSH_VERTICES(ctx, _NEW_PROGRAM);
/* Error-check target and get curProg */
if ((target == GL_VERTEX_PROGRAM_ARB) && /* == GL_VERTEX_PROGRAM_NV */
(ctx->Extensions.NV_vertex_program ||
@ -132,6 +130,9 @@ _mesa_BindProgram(GLenum target, GLuint id)
return;
}
/* signal new program (and its new constants) */
FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
/* bind newProg */
if (target == GL_VERTEX_PROGRAM_ARB) { /* == GL_VERTEX_PROGRAM_NV */
_mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current,
@ -489,7 +490,7 @@ _mesa_ProgramEnvParameter4fARB(GLenum target, GLuint index,
GET_CURRENT_CONTEXT(ctx);
ASSERT_OUTSIDE_BEGIN_END(ctx);
FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
if (target == GL_FRAGMENT_PROGRAM_ARB
&& ctx->Extensions.ARB_fragment_program) {
@ -537,7 +538,7 @@ _mesa_ProgramEnvParameters4fvEXT(GLenum target, GLuint index, GLsizei count,
GLfloat * dest;
ASSERT_OUTSIDE_BEGIN_END(ctx);
FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
if (count <= 0) {
_mesa_error(ctx, GL_INVALID_VALUE, "glProgramEnvParameters4fv(count)");
@ -631,7 +632,7 @@ _mesa_ProgramLocalParameter4fARB(GLenum target, GLuint index,
struct gl_program *prog;
ASSERT_OUTSIDE_BEGIN_END(ctx);
FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
if ((target == GL_FRAGMENT_PROGRAM_NV
&& ctx->Extensions.NV_fragment_program) ||
@ -685,7 +686,7 @@ _mesa_ProgramLocalParameters4fvEXT(GLenum target, GLuint index, GLsizei count,
GLint i;
ASSERT_OUTSIDE_BEGIN_END(ctx);
FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
if (count <= 0) {
_mesa_error(ctx, GL_INVALID_VALUE, "glProgramLocalParameters4fv(count)");

View file

@ -706,7 +706,7 @@ _mesa_ProgramNamedParameter4fNV(GLuint id, GLsizei len, const GLubyte *name,
GET_CURRENT_CONTEXT(ctx);
ASSERT_OUTSIDE_BEGIN_END(ctx);
FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
prog = _mesa_lookup_program(ctx, id);
if (!prog || prog->Target != GL_FRAGMENT_PROGRAM_NV) {

View file

@ -547,15 +547,13 @@ update_interval(GLint intBegin[], GLint intEnd[], GLuint index, GLuint ic)
/**
* Find the live intervals for each temporary register in the program.
* For register R, the interval [A,B] indicates that R is referenced
* from instruction A through instruction B.
* Special consideration is needed for loops and subroutines.
* \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
* Find first/last instruction that references each temporary register.
*/
static GLboolean
find_live_intervals(struct gl_program *prog,
struct interval_list *liveIntervals)
GLboolean
_mesa_find_temp_intervals(const struct prog_instruction *instructions,
GLuint numInstructions,
GLint intBegin[MAX_PROGRAM_TEMPS],
GLint intEnd[MAX_PROGRAM_TEMPS])
{
struct loop_info
{
@ -563,26 +561,15 @@ find_live_intervals(struct gl_program *prog,
};
struct loop_info loopStack[MAX_LOOP_NESTING];
GLuint loopStackDepth = 0;
GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS];
GLuint i;
/*
* Note: we'll return GL_FALSE below if we find relative indexing
* into the TEMP register file. We can't handle that yet.
* We also give up on subroutines for now.
*/
if (dbg) {
_mesa_printf("Optimize: Begin find intervals\n");
}
for (i = 0; i < MAX_PROGRAM_TEMPS; i++){
intBegin[i] = intEnd[i] = -1;
}
/* Scan instructions looking for temporary registers */
for (i = 0; i < prog->NumInstructions; i++) {
const struct prog_instruction *inst = prog->Instructions + i;
for (i = 0; i < numInstructions; i++) {
const struct prog_instruction *inst = instructions + i;
if (inst->Opcode == OPCODE_BGNLOOP) {
loopStack[loopStackDepth].Start = i;
loopStack[loopStackDepth].End = inst->BranchTarget;
@ -595,7 +582,7 @@ find_live_intervals(struct gl_program *prog,
return GL_FALSE;
}
else {
const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/
GLuint j;
for (j = 0; j < numSrc; j++) {
if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
@ -624,6 +611,39 @@ find_live_intervals(struct gl_program *prog,
}
}
return GL_TRUE;
}
/**
* Find the live intervals for each temporary register in the program.
* For register R, the interval [A,B] indicates that R is referenced
* from instruction A through instruction B.
* Special consideration is needed for loops and subroutines.
* \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
*/
static GLboolean
find_live_intervals(struct gl_program *prog,
struct interval_list *liveIntervals)
{
GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS];
GLuint i;
/*
* Note: we'll return GL_FALSE below if we find relative indexing
* into the TEMP register file. We can't handle that yet.
* We also give up on subroutines for now.
*/
if (dbg) {
_mesa_printf("Optimize: Begin find intervals\n");
}
/* build intermediate arrays */
if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions,
intBegin, intEnd))
return GL_FALSE;
/* Build live intervals list from intermediate arrays */
liveIntervals->Num = 0;
for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
@ -794,6 +814,96 @@ _mesa_reallocate_registers(struct gl_program *prog)
#if 0
static void
_mesa_find_temporary_live_intervals(struct gl_program *prog,
GLint firstInst[MAX_PROGRAM_TEMPS],
GLint lastInst[MAX_PROGRAM_TEMPS])
{
GLuint i;
for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
firstInst[i] = lastInst[i] = -1;
}
struct loop_info loopStack[MAX_LOOP_NESTING];
GLuint loopStackDepth = 0;
GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS];
GLuint i;
/*
* Note: we'll return GL_FALSE below if we find relative indexing
* into the TEMP register file. We can't handle that yet.
* We also give up on subroutines for now.
*/
if (dbg) {
_mesa_printf("Optimize: Begin find intervals\n");
}
for (i = 0; i < MAX_PROGRAM_TEMPS; i++){
intBegin[i] = intEnd[i] = -1;
}
/* Scan instructions looking for temporary registers */
for (i = 0; i < prog->NumInstructions; i++) {
const struct prog_instruction *inst = prog->Instructions + i;
if (inst->Opcode == OPCODE_BGNLOOP) {
loopStack[loopStackDepth].Start = i;
loopStack[loopStackDepth].End = inst->BranchTarget;
loopStackDepth++;
}
else if (inst->Opcode == OPCODE_ENDLOOP) {
loopStackDepth--;
}
else if (inst->Opcode == OPCODE_CAL) {
return GL_FALSE;
}
else {
const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
GLuint j;
for (j = 0; j < numSrc; j++) {
if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
const GLuint index = inst->SrcReg[j].Index;
if (inst->SrcReg[j].RelAddr)
return GL_FALSE;
update_interval(intBegin, intEnd, index, i);
if (loopStackDepth > 0) {
/* extend temp register's interval to end of loop */
GLuint loopEnd = loopStack[loopStackDepth - 1].End;
update_interval(intBegin, intEnd, index, loopEnd);
}
}
}
if (inst->DstReg.File == PROGRAM_TEMPORARY) {
const GLuint index = inst->DstReg.Index;
if (inst->DstReg.RelAddr)
return GL_FALSE;
update_interval(intBegin, intEnd, index, i);
if (loopStackDepth > 0) {
/* extend temp register's interval to end of loop */
GLuint loopEnd = loopStack[loopStackDepth - 1].End;
update_interval(intBegin, intEnd, index, loopEnd);
}
}
}
}
#endif
/**
* Apply optimizations to the given program to eliminate unnecessary
* instructions, temp regs, etc.

View file

@ -25,7 +25,19 @@
#ifndef PROG_OPT_H
#define PROG_OPT_H
#include "main/config.h"
struct gl_program;
struct prog_instruction;
extern GLboolean
_mesa_find_temp_intervals(const struct prog_instruction *instructions,
GLuint numInstructions,
GLint intBegin[MAX_PROGRAM_TEMPS],
GLint intEnd[MAX_PROGRAM_TEMPS]);
extern void
_mesa_optimize_program(GLcontext *ctx, struct gl_program *program);

View file

@ -1487,7 +1487,7 @@ _mesa_use_program(GLcontext *ctx, GLuint program)
return;
}
FLUSH_VERTICES(ctx, _NEW_PROGRAM);
FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
if (program) {
shProg = _mesa_lookup_shader_program_err(ctx, program, "glUseProgram");
@ -1789,7 +1789,7 @@ _mesa_uniform(GLcontext *ctx, GLint location, GLsizei count,
return;
}
FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
uniform = &shProg->Uniforms->Uniforms[location];
@ -1929,7 +1929,7 @@ _mesa_uniform_matrix(GLcontext *ctx, GLint cols, GLint rows,
return;
}
FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
uniform = &shProg->Uniforms->Uniforms[location];