mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 15:38:09 +02:00
Merge branch 'const-buffer-changes'
Conflicts: src/mesa/drivers/dri/i965/brw_curbe.c src/mesa/drivers/dri/i965/brw_vs_emit.c src/mesa/drivers/dri/i965/brw_wm_glsl.c
This commit is contained in:
commit
b9196c1fa3
19 changed files with 507 additions and 234 deletions
|
|
@ -73,7 +73,7 @@ i915InvalidateState(GLcontext * ctx, GLuint new_state)
|
|||
p->params_uptodate = 0;
|
||||
}
|
||||
|
||||
if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM))
|
||||
if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))
|
||||
i915_update_fog(ctx);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -245,6 +245,9 @@ struct brw_vs_ouput_sizes {
|
|||
};
|
||||
|
||||
|
||||
/** Number of general purpose registers (VS, WM, etc) */
|
||||
#define BRW_MAX_GRF 128
|
||||
|
||||
/** Number of texture sampler units */
|
||||
#define BRW_MAX_TEX_UNIT 16
|
||||
|
||||
|
|
@ -450,8 +453,6 @@ struct brw_context
|
|||
|
||||
struct {
|
||||
struct brw_state_flags dirty;
|
||||
struct brw_tracked_state **atoms;
|
||||
GLuint nr_atoms;
|
||||
|
||||
GLuint nr_color_regions;
|
||||
struct intel_region *color_regions[MAX_DRAW_BUFFERS];
|
||||
|
|
@ -471,7 +472,8 @@ struct brw_context
|
|||
int validated_bo_count;
|
||||
} state;
|
||||
|
||||
struct brw_cache cache;
|
||||
struct brw_cache cache; /** non-surface items */
|
||||
struct brw_cache surface_cache; /* surface items */
|
||||
struct brw_cached_batch_item *cached_batch_items;
|
||||
|
||||
struct {
|
||||
|
|
@ -555,11 +557,6 @@ struct brw_context
|
|||
GLuint vs_size;
|
||||
GLuint total_size;
|
||||
|
||||
/* Dynamic tracker which changes to reflect the state referenced
|
||||
* by active fp and vp program parameters:
|
||||
*/
|
||||
struct brw_tracked_state tracked_state;
|
||||
|
||||
dri_bo *curbe_bo;
|
||||
/** Offset within curbe_bo of space for current curbe entry */
|
||||
GLuint curbe_offset;
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@
|
|||
#include "main/macros.h"
|
||||
#include "main/enums.h"
|
||||
#include "shader/prog_parameter.h"
|
||||
#include "shader/prog_print.h"
|
||||
#include "shader/prog_statevars.h"
|
||||
#include "intel_batchbuffer.h"
|
||||
#include "intel_regions.h"
|
||||
|
|
@ -188,13 +189,6 @@ static void prepare_constant_buffer(struct brw_context *brw)
|
|||
GLfloat *buf;
|
||||
GLuint i;
|
||||
|
||||
/* Update our own dependency flags. This works because this
|
||||
* function will also be called whenever fp or vp changes.
|
||||
*/
|
||||
brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION);
|
||||
brw->curbe.tracked_state.dirty.mesa |= vp->program.Base.Parameters->StateFlags;
|
||||
brw->curbe.tracked_state.dirty.mesa |= fp->program.Base.Parameters->StateFlags;
|
||||
|
||||
if (sz == 0) {
|
||||
if (brw->curbe.last_buf) {
|
||||
free(brw->curbe.last_buf);
|
||||
|
|
@ -363,11 +357,7 @@ update_constant_buffer(struct brw_context *brw,
|
|||
}
|
||||
|
||||
if (0) {
|
||||
int i;
|
||||
for (i = 0; i < params->NumParameters; i++) {
|
||||
float *p = params->ParameterValues[i];
|
||||
printf("%d: %f %f %f %f\n", i, p[0], p[1], p[2], p[3]);
|
||||
}
|
||||
_mesa_print_parameter_list(params);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -380,7 +370,7 @@ update_vertex_constant_buffer(struct brw_context *brw)
|
|||
struct brw_vertex_program *vp =
|
||||
(struct brw_vertex_program *) brw->vertex_program;
|
||||
if (0) {
|
||||
printf("update VS constants in buffer %p\n", vp->const_buffer);
|
||||
printf("update VS constants in buffer %p vp = %p\n", vp->const_buffer, vp);
|
||||
printf("program %u\n", vp->program.Base.Id);
|
||||
}
|
||||
if (vp->use_const_buffer)
|
||||
|
|
@ -394,6 +384,10 @@ update_fragment_constant_buffer(struct brw_context *brw)
|
|||
{
|
||||
struct brw_fragment_program *fp =
|
||||
(struct brw_fragment_program *) brw->fragment_program;
|
||||
if (0) {
|
||||
printf("update WM constants in buffer %p\n", fp->const_buffer);
|
||||
printf("program %u\n", fp->program.Base.Id);
|
||||
}
|
||||
if (fp->use_const_buffer)
|
||||
update_constant_buffer(brw, fp->program.Base.Parameters, fp->const_buffer);
|
||||
}
|
||||
|
|
@ -428,7 +422,7 @@ static void emit_constant_buffer(struct brw_context *brw)
|
|||
*/
|
||||
const struct brw_tracked_state brw_constant_buffer = {
|
||||
.dirty = {
|
||||
.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION), /* plus fp and vp flags */
|
||||
.mesa = _NEW_PROGRAM_CONSTANTS,
|
||||
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
|
||||
BRW_NEW_VERTEX_PROGRAM |
|
||||
BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
|
||||
|
|
|
|||
|
|
@ -135,8 +135,8 @@ dri_bo *brw_search_cache( struct brw_cache *cache,
|
|||
void *aux_return);
|
||||
void brw_state_cache_check_size( struct brw_context *brw );
|
||||
|
||||
void brw_init_cache( struct brw_context *brw );
|
||||
void brw_destroy_cache( struct brw_context *brw );
|
||||
void brw_init_caches( struct brw_context *brw );
|
||||
void brw_destroy_caches( struct brw_context *brw );
|
||||
|
||||
/***********************************************************************
|
||||
* brw_state_batch.c
|
||||
|
|
|
|||
|
|
@ -56,9 +56,9 @@
|
|||
* incorrect program is run for the other instance.
|
||||
*/
|
||||
|
||||
#include "main/imports.h"
|
||||
#include "brw_state.h"
|
||||
#include "intel_batchbuffer.h"
|
||||
#include "main/imports.h"
|
||||
|
||||
/* XXX: Fixme - have to include these to get the sizes of the prog_key
|
||||
* structs:
|
||||
|
|
@ -69,8 +69,10 @@
|
|||
#include "brw_sf.h"
|
||||
#include "brw_gs.h"
|
||||
|
||||
static GLuint hash_key( const void *key, GLuint key_size,
|
||||
dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
|
||||
|
||||
static GLuint
|
||||
hash_key(const void *key, GLuint key_size,
|
||||
dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
|
||||
{
|
||||
GLuint *ikey = (GLuint *)key;
|
||||
GLuint hash = 0, i;
|
||||
|
|
@ -95,6 +97,7 @@ static GLuint hash_key( const void *key, GLuint key_size,
|
|||
return hash;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Marks a new buffer as being chosen for the given cache id.
|
||||
*/
|
||||
|
|
@ -111,6 +114,7 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
|
|||
cache->brw->state.dirty.cache |= 1 << cache_id;
|
||||
}
|
||||
|
||||
|
||||
static struct brw_cache_item *
|
||||
search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
|
||||
GLuint hash, const void *key, GLuint key_size,
|
||||
|
|
@ -143,7 +147,8 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
|
|||
}
|
||||
|
||||
|
||||
static void rehash( struct brw_cache *cache )
|
||||
static void
|
||||
rehash(struct brw_cache *cache)
|
||||
{
|
||||
struct brw_cache_item **items;
|
||||
struct brw_cache_item *c, *next;
|
||||
|
|
@ -164,15 +169,17 @@ static void rehash( struct brw_cache *cache )
|
|||
cache->size = size;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the buffer object matching cache_id and key, or NULL.
|
||||
*/
|
||||
dri_bo *brw_search_cache( struct brw_cache *cache,
|
||||
enum brw_cache_id cache_id,
|
||||
const void *key,
|
||||
GLuint key_size,
|
||||
dri_bo **reloc_bufs, GLuint nr_reloc_bufs,
|
||||
void *aux_return )
|
||||
dri_bo *
|
||||
brw_search_cache(struct brw_cache *cache,
|
||||
enum brw_cache_id cache_id,
|
||||
const void *key,
|
||||
GLuint key_size,
|
||||
dri_bo **reloc_bufs, GLuint nr_reloc_bufs,
|
||||
void *aux_return)
|
||||
{
|
||||
struct brw_cache_item *item;
|
||||
GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
|
||||
|
|
@ -192,6 +199,7 @@ dri_bo *brw_search_cache( struct brw_cache *cache,
|
|||
return item->bo;
|
||||
}
|
||||
|
||||
|
||||
dri_bo *
|
||||
brw_upload_cache( struct brw_cache *cache,
|
||||
enum brw_cache_id cache_id,
|
||||
|
|
@ -265,7 +273,9 @@ brw_upload_cache( struct brw_cache *cache,
|
|||
return bo;
|
||||
}
|
||||
|
||||
/* This doesn't really work with aux data. Use search/upload instead
|
||||
|
||||
/**
|
||||
* This doesn't really work with aux data. Use search/upload instead
|
||||
*/
|
||||
dri_bo *
|
||||
brw_cache_data_sz(struct brw_cache *cache,
|
||||
|
|
@ -296,6 +306,7 @@ brw_cache_data_sz(struct brw_cache *cache,
|
|||
return bo;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Wrapper around brw_cache_data_sz using the cache_id's canonical key size.
|
||||
*
|
||||
|
|
@ -319,21 +330,22 @@ enum pool_type {
|
|||
DW_GENERAL_STATE
|
||||
};
|
||||
|
||||
static void
|
||||
brw_init_cache_id( struct brw_context *brw,
|
||||
const char *name,
|
||||
enum brw_cache_id id,
|
||||
GLuint key_size,
|
||||
GLuint aux_size)
|
||||
{
|
||||
struct brw_cache *cache = &brw->cache;
|
||||
|
||||
static void
|
||||
brw_init_cache_id(struct brw_cache *cache,
|
||||
const char *name,
|
||||
enum brw_cache_id id,
|
||||
GLuint key_size,
|
||||
GLuint aux_size)
|
||||
{
|
||||
cache->name[id] = strdup(name);
|
||||
cache->key_size[id] = key_size;
|
||||
cache->aux_size[id] = aux_size;
|
||||
}
|
||||
|
||||
void brw_init_cache( struct brw_context *brw )
|
||||
|
||||
static void
|
||||
brw_init_non_surface_cache(struct brw_context *brw)
|
||||
{
|
||||
struct brw_cache *cache = &brw->cache;
|
||||
|
||||
|
|
@ -342,114 +354,136 @@ void brw_init_cache( struct brw_context *brw )
|
|||
cache->size = 7;
|
||||
cache->n_items = 0;
|
||||
cache->items = (struct brw_cache_item **)
|
||||
_mesa_calloc(cache->size *
|
||||
sizeof(struct brw_cache_item));
|
||||
_mesa_calloc(cache->size * sizeof(struct brw_cache_item));
|
||||
|
||||
brw_init_cache_id(brw,
|
||||
brw_init_cache_id(cache,
|
||||
"CC_VP",
|
||||
BRW_CC_VP,
|
||||
sizeof(struct brw_cc_viewport),
|
||||
0);
|
||||
|
||||
brw_init_cache_id(brw,
|
||||
brw_init_cache_id(cache,
|
||||
"CC_UNIT",
|
||||
BRW_CC_UNIT,
|
||||
sizeof(struct brw_cc_unit_state),
|
||||
0);
|
||||
|
||||
brw_init_cache_id(brw,
|
||||
brw_init_cache_id(cache,
|
||||
"WM_PROG",
|
||||
BRW_WM_PROG,
|
||||
sizeof(struct brw_wm_prog_key),
|
||||
sizeof(struct brw_wm_prog_data));
|
||||
|
||||
brw_init_cache_id(brw,
|
||||
brw_init_cache_id(cache,
|
||||
"SAMPLER_DEFAULT_COLOR",
|
||||
BRW_SAMPLER_DEFAULT_COLOR,
|
||||
sizeof(struct brw_sampler_default_color),
|
||||
0);
|
||||
|
||||
brw_init_cache_id(brw,
|
||||
brw_init_cache_id(cache,
|
||||
"SAMPLER",
|
||||
BRW_SAMPLER,
|
||||
0, /* variable key/data size */
|
||||
0);
|
||||
|
||||
brw_init_cache_id(brw,
|
||||
brw_init_cache_id(cache,
|
||||
"WM_UNIT",
|
||||
BRW_WM_UNIT,
|
||||
sizeof(struct brw_wm_unit_state),
|
||||
0);
|
||||
|
||||
brw_init_cache_id(brw,
|
||||
brw_init_cache_id(cache,
|
||||
"SF_PROG",
|
||||
BRW_SF_PROG,
|
||||
sizeof(struct brw_sf_prog_key),
|
||||
sizeof(struct brw_sf_prog_data));
|
||||
|
||||
brw_init_cache_id(brw,
|
||||
brw_init_cache_id(cache,
|
||||
"SF_VP",
|
||||
BRW_SF_VP,
|
||||
sizeof(struct brw_sf_viewport),
|
||||
0);
|
||||
|
||||
brw_init_cache_id(brw,
|
||||
brw_init_cache_id(cache,
|
||||
"SF_UNIT",
|
||||
BRW_SF_UNIT,
|
||||
sizeof(struct brw_sf_unit_state),
|
||||
0);
|
||||
|
||||
brw_init_cache_id(brw,
|
||||
brw_init_cache_id(cache,
|
||||
"VS_UNIT",
|
||||
BRW_VS_UNIT,
|
||||
sizeof(struct brw_vs_unit_state),
|
||||
0);
|
||||
|
||||
brw_init_cache_id(brw,
|
||||
brw_init_cache_id(cache,
|
||||
"VS_PROG",
|
||||
BRW_VS_PROG,
|
||||
sizeof(struct brw_vs_prog_key),
|
||||
sizeof(struct brw_vs_prog_data));
|
||||
|
||||
brw_init_cache_id(brw,
|
||||
brw_init_cache_id(cache,
|
||||
"CLIP_UNIT",
|
||||
BRW_CLIP_UNIT,
|
||||
sizeof(struct brw_clip_unit_state),
|
||||
0);
|
||||
|
||||
brw_init_cache_id(brw,
|
||||
brw_init_cache_id(cache,
|
||||
"CLIP_PROG",
|
||||
BRW_CLIP_PROG,
|
||||
sizeof(struct brw_clip_prog_key),
|
||||
sizeof(struct brw_clip_prog_data));
|
||||
|
||||
brw_init_cache_id(brw,
|
||||
brw_init_cache_id(cache,
|
||||
"GS_UNIT",
|
||||
BRW_GS_UNIT,
|
||||
sizeof(struct brw_gs_unit_state),
|
||||
0);
|
||||
|
||||
brw_init_cache_id(brw,
|
||||
brw_init_cache_id(cache,
|
||||
"GS_PROG",
|
||||
BRW_GS_PROG,
|
||||
sizeof(struct brw_gs_prog_key),
|
||||
sizeof(struct brw_gs_prog_data));
|
||||
}
|
||||
|
||||
brw_init_cache_id(brw,
|
||||
|
||||
static void
|
||||
brw_init_surface_cache(struct brw_context *brw)
|
||||
{
|
||||
struct brw_cache *cache = &brw->surface_cache;
|
||||
|
||||
cache->brw = brw;
|
||||
|
||||
cache->size = 7;
|
||||
cache->n_items = 0;
|
||||
cache->items = (struct brw_cache_item **)
|
||||
_mesa_calloc(cache->size * sizeof(struct brw_cache_item));
|
||||
|
||||
brw_init_cache_id(cache,
|
||||
"SS_SURFACE",
|
||||
BRW_SS_SURFACE,
|
||||
sizeof(struct brw_surface_state),
|
||||
0);
|
||||
|
||||
brw_init_cache_id(brw,
|
||||
brw_init_cache_id(cache,
|
||||
"SS_SURF_BIND",
|
||||
BRW_SS_SURF_BIND,
|
||||
0,
|
||||
0);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
brw_init_caches(struct brw_context *brw)
|
||||
{
|
||||
brw_init_non_surface_cache(brw);
|
||||
brw_init_surface_cache(brw);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
brw_clear_cache( struct brw_context *brw )
|
||||
brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
|
||||
{
|
||||
struct brw_cache_item *c, *next;
|
||||
GLuint i;
|
||||
|
|
@ -457,8 +491,8 @@ brw_clear_cache( struct brw_context *brw )
|
|||
if (INTEL_DEBUG & DEBUG_STATE)
|
||||
_mesa_printf("%s\n", __FUNCTION__);
|
||||
|
||||
for (i = 0; i < brw->cache.size; i++) {
|
||||
for (c = brw->cache.items[i]; c; c = next) {
|
||||
for (i = 0; i < cache->size; i++) {
|
||||
for (c = cache->items[i]; c; c = next) {
|
||||
int j;
|
||||
|
||||
next = c->next;
|
||||
|
|
@ -468,10 +502,10 @@ brw_clear_cache( struct brw_context *brw )
|
|||
free((void *)c->key);
|
||||
free(c);
|
||||
}
|
||||
brw->cache.items[i] = NULL;
|
||||
cache->items[i] = NULL;
|
||||
}
|
||||
|
||||
brw->cache.n_items = 0;
|
||||
cache->n_items = 0;
|
||||
|
||||
if (brw->curbe.last_buf) {
|
||||
_mesa_free(brw->curbe.last_buf);
|
||||
|
|
@ -483,25 +517,40 @@ brw_clear_cache( struct brw_context *brw )
|
|||
brw->state.dirty.cache |= ~0;
|
||||
}
|
||||
|
||||
void brw_state_cache_check_size( struct brw_context *brw )
|
||||
|
||||
void
|
||||
brw_state_cache_check_size(struct brw_context *brw)
|
||||
{
|
||||
/* un-tuned guess. We've got around 20 state objects for a total of around
|
||||
* 32k, so 1000 of them is around 1.5MB.
|
||||
*/
|
||||
if (brw->cache.n_items > 1000)
|
||||
brw_clear_cache(brw);
|
||||
brw_clear_cache(brw, &brw->cache);
|
||||
|
||||
if (brw->surface_cache.n_items > 1000)
|
||||
brw_clear_cache(brw, &brw->surface_cache);
|
||||
}
|
||||
|
||||
void brw_destroy_cache( struct brw_context *brw )
|
||||
|
||||
static void
|
||||
brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
|
||||
{
|
||||
GLuint i;
|
||||
|
||||
brw_clear_cache(brw);
|
||||
brw_clear_cache(brw, cache);
|
||||
for (i = 0; i < BRW_MAX_CACHE; i++) {
|
||||
dri_bo_unreference(brw->cache.last_bo[i]);
|
||||
free(brw->cache.name[i]);
|
||||
dri_bo_unreference(cache->last_bo[i]);
|
||||
free(cache->name[i]);
|
||||
}
|
||||
free(brw->cache.items);
|
||||
brw->cache.items = NULL;
|
||||
brw->cache.size = 0;
|
||||
free(cache->items);
|
||||
cache->items = NULL;
|
||||
cache->size = 0;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
brw_destroy_caches(struct brw_context *brw)
|
||||
{
|
||||
brw_destroy_cache(brw, &brw->cache);
|
||||
brw_destroy_cache(brw, &brw->surface_cache);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -59,7 +59,6 @@ const struct brw_tracked_state *atoms[] =
|
|||
&brw_curbe_offsets,
|
||||
&brw_recalculate_urb_fence,
|
||||
|
||||
|
||||
&brw_cc_vp,
|
||||
&brw_cc_unit,
|
||||
|
||||
|
|
@ -88,54 +87,26 @@ const struct brw_tracked_state *atoms[] =
|
|||
|
||||
&brw_line_stipple,
|
||||
&brw_aa_line_parameters,
|
||||
/* Ordering of the commands below is documented as fixed.
|
||||
*/
|
||||
#if 0
|
||||
&brw_pipelined_state_pointers,
|
||||
&brw_urb_fence,
|
||||
&brw_constant_buffer_state,
|
||||
#else
|
||||
|
||||
&brw_psp_urb_cbs,
|
||||
#endif
|
||||
|
||||
&brw_drawing_rect,
|
||||
&brw_indices,
|
||||
&brw_vertices,
|
||||
|
||||
NULL, /* brw_constant_buffer */
|
||||
&brw_constant_buffer
|
||||
};
|
||||
|
||||
|
||||
void brw_init_state( struct brw_context *brw )
|
||||
{
|
||||
GLuint i;
|
||||
|
||||
brw_init_cache(brw);
|
||||
|
||||
brw->state.atoms = _mesa_malloc(sizeof(atoms));
|
||||
brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms);
|
||||
_mesa_memcpy(brw->state.atoms, atoms, sizeof(atoms));
|
||||
|
||||
/* Patch in a pointer to the dynamic state atom:
|
||||
*/
|
||||
for (i = 0; i < brw->state.nr_atoms; i++)
|
||||
if (brw->state.atoms[i] == NULL)
|
||||
brw->state.atoms[i] = &brw->curbe.tracked_state;
|
||||
|
||||
_mesa_memcpy(&brw->curbe.tracked_state,
|
||||
&brw_constant_buffer,
|
||||
sizeof(brw_constant_buffer));
|
||||
brw_init_caches(brw);
|
||||
}
|
||||
|
||||
|
||||
void brw_destroy_state( struct brw_context *brw )
|
||||
{
|
||||
if (brw->state.atoms) {
|
||||
_mesa_free(brw->state.atoms);
|
||||
brw->state.atoms = NULL;
|
||||
}
|
||||
|
||||
brw_destroy_cache(brw);
|
||||
brw_destroy_caches(brw);
|
||||
brw_destroy_batch_cache(brw);
|
||||
}
|
||||
|
||||
|
|
@ -218,6 +189,7 @@ static struct dirty_bit_map mesa_bits[] = {
|
|||
DEFINE_BIT(_NEW_MULTISAMPLE),
|
||||
DEFINE_BIT(_NEW_TRACK_MATRIX),
|
||||
DEFINE_BIT(_NEW_PROGRAM),
|
||||
DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
|
||||
{0, 0, 0}
|
||||
};
|
||||
|
||||
|
|
@ -336,7 +308,7 @@ void brw_validate_state( struct brw_context *brw )
|
|||
|
||||
/* do prepare stage for all atoms */
|
||||
for (i = 0; i < Elements(atoms); i++) {
|
||||
const struct brw_tracked_state *atom = brw->state.atoms[i];
|
||||
const struct brw_tracked_state *atom = atoms[i];
|
||||
|
||||
if (brw->intel.Fallback)
|
||||
break;
|
||||
|
|
@ -367,8 +339,8 @@ void brw_upload_state(struct brw_context *brw)
|
|||
_mesa_memset(&examined, 0, sizeof(examined));
|
||||
prev = *state;
|
||||
|
||||
for (i = 0; i < brw->state.nr_atoms; i++) {
|
||||
const struct brw_tracked_state *atom = brw->state.atoms[i];
|
||||
for (i = 0; i < Elements(atoms); i++) {
|
||||
const struct brw_tracked_state *atom = atoms[i];
|
||||
struct brw_state_flags generated;
|
||||
|
||||
assert(atom->dirty.mesa ||
|
||||
|
|
@ -397,7 +369,7 @@ void brw_upload_state(struct brw_context *brw)
|
|||
}
|
||||
else {
|
||||
for (i = 0; i < Elements(atoms); i++) {
|
||||
const struct brw_tracked_state *atom = brw->state.atoms[i];
|
||||
const struct brw_tracked_state *atom = atoms[i];
|
||||
|
||||
if (brw->intel.Fallback)
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -69,13 +69,18 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
|
|||
{
|
||||
GLuint i, reg = 0, mrf;
|
||||
|
||||
#if 0
|
||||
if (c->vp->program.Base.Parameters->NumParameters >= 6)
|
||||
c->vp->use_const_buffer = 1;
|
||||
/* Determine whether to use a real constant buffer or use a block
|
||||
* of GRF registers for constants. The later is faster but only
|
||||
* works if everything fits in the GRF.
|
||||
* XXX this heuristic/check may need some fine tuning...
|
||||
*/
|
||||
if (c->vp->program.Base.Parameters->NumParameters +
|
||||
c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF)
|
||||
c->vp->use_const_buffer = GL_TRUE;
|
||||
else
|
||||
#endif
|
||||
c->vp->use_const_buffer = GL_FALSE;
|
||||
/*printf("use_const_buffer = %d\n", c->use_const_buffer);*/
|
||||
|
||||
/*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/
|
||||
|
||||
/* r0 -- reserved as usual
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -240,15 +240,18 @@ struct brw_wm_compile {
|
|||
GLuint max_wm_grf;
|
||||
GLuint last_scratch;
|
||||
|
||||
GLuint cur_inst; /**< index of current instruction */
|
||||
|
||||
/** Mapping from Mesa registers to hardware registers */
|
||||
struct {
|
||||
GLboolean inited;
|
||||
struct brw_reg reg;
|
||||
} wm_regs[PROGRAM_PAYLOAD+1][256][4];
|
||||
|
||||
GLboolean used_grf[BRW_WM_MAX_GRF];
|
||||
GLuint first_free_grf;
|
||||
struct brw_reg stack;
|
||||
struct brw_reg emit_mask_reg;
|
||||
GLuint reg_index; /**< Index of next free GRF register */
|
||||
GLuint tmp_regs[BRW_WM_MAX_GRF];
|
||||
GLuint tmp_index;
|
||||
GLuint tmp_max;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
#include "main/macros.h"
|
||||
#include "shader/prog_parameter.h"
|
||||
#include "shader/prog_print.h"
|
||||
#include "shader/prog_optimize.h"
|
||||
#include "brw_context.h"
|
||||
#include "brw_eu.h"
|
||||
#include "brw_wm.h"
|
||||
|
|
@ -42,6 +44,76 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
|
|||
}
|
||||
|
||||
|
||||
|
||||
static void
|
||||
reclaim_temps(struct brw_wm_compile *c);
|
||||
|
||||
|
||||
/** Mark GRF register as used. */
|
||||
static void
|
||||
prealloc_grf(struct brw_wm_compile *c, int r)
|
||||
{
|
||||
c->used_grf[r] = GL_TRUE;
|
||||
}
|
||||
|
||||
|
||||
/** Mark given GRF register as not in use. */
|
||||
static void
|
||||
release_grf(struct brw_wm_compile *c, int r)
|
||||
{
|
||||
/*assert(c->used_grf[r]);*/
|
||||
c->used_grf[r] = GL_FALSE;
|
||||
c->first_free_grf = MIN2(c->first_free_grf, r);
|
||||
}
|
||||
|
||||
|
||||
/** Return index of a free GRF, mark it as used. */
|
||||
static int
|
||||
alloc_grf(struct brw_wm_compile *c)
|
||||
{
|
||||
GLuint r;
|
||||
for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
|
||||
if (!c->used_grf[r]) {
|
||||
c->used_grf[r] = GL_TRUE;
|
||||
c->first_free_grf = r + 1; /* a guess */
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
/* no free temps, try to reclaim some */
|
||||
reclaim_temps(c);
|
||||
c->first_free_grf = 0;
|
||||
|
||||
/* try alloc again */
|
||||
for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
|
||||
if (!c->used_grf[r]) {
|
||||
c->used_grf[r] = GL_TRUE;
|
||||
c->first_free_grf = r + 1; /* a guess */
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
for (r = 0; r < BRW_WM_MAX_GRF; r++) {
|
||||
assert(c->used_grf[r]);
|
||||
}
|
||||
/*printf("Really out of temp regs!\n");*/
|
||||
return 60;
|
||||
}
|
||||
|
||||
|
||||
/** Return number of GRF registers used */
|
||||
static int
|
||||
num_grf_used(const struct brw_wm_compile *c)
|
||||
{
|
||||
int r;
|
||||
for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--)
|
||||
if (c->used_grf[r])
|
||||
return r + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Record the mapping of a Mesa register to a hardware register.
|
||||
*/
|
||||
|
|
@ -68,11 +140,18 @@ static int get_scalar_dst_index(const struct prog_instruction *inst)
|
|||
static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
|
||||
{
|
||||
struct brw_reg reg;
|
||||
if(c->tmp_index == c->tmp_max)
|
||||
c->tmp_regs[ c->tmp_max++ ] = c->reg_index++;
|
||||
|
||||
|
||||
/* if we need to allocate another temp, grow the tmp_regs[] array */
|
||||
if (c->tmp_index == c->tmp_max) {
|
||||
c->tmp_regs[ c->tmp_max++ ] = alloc_grf(c);
|
||||
}
|
||||
|
||||
/* form the GRF register */
|
||||
reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0);
|
||||
/*printf("alloc_temp %d\n", reg.nr);*/
|
||||
assert(reg.nr < BRW_WM_MAX_GRF);
|
||||
return reg;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -130,35 +209,26 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component,
|
|||
return brw_null_reg();
|
||||
}
|
||||
|
||||
assert(index < 256);
|
||||
/* see if we've already allocated a HW register for this Mesa register */
|
||||
if (c->wm_regs[file][index][component].inited) {
|
||||
/* yes, re-use */
|
||||
reg = c->wm_regs[file][index][component].reg;
|
||||
/* yes, re-use */
|
||||
reg = c->wm_regs[file][index][component].reg;
|
||||
}
|
||||
else {
|
||||
/* no, allocate new register */
|
||||
reg = brw_vec8_grf(c->reg_index, 0);
|
||||
int grf = alloc_grf(c);
|
||||
if (grf < 0) {
|
||||
/* totally out of temps */
|
||||
grf = 70; /* XXX !!!! */
|
||||
}
|
||||
|
||||
reg = brw_vec8_grf(grf, 0);
|
||||
/*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
|
||||
|
||||
set_reg(c, file, index, component, reg);
|
||||
}
|
||||
|
||||
/* if this is a new register allocation, record it in the table */
|
||||
if (!c->wm_regs[file][index][component].inited) {
|
||||
set_reg(c, file, index, component, reg);
|
||||
c->reg_index++;
|
||||
}
|
||||
|
||||
if (c->reg_index >= BRW_WM_MAX_GRF - 12) {
|
||||
/* ran out of temporary registers! */
|
||||
#if 1
|
||||
/* This is a big hack for now.
|
||||
* Return bad register index, just don't hang the GPU.
|
||||
*/
|
||||
_mesa_fprintf(stderr, "out of regs %d\n", c->reg_index);
|
||||
c->reg_index = BRW_WM_MAX_GRF - 13;
|
||||
#else
|
||||
return brw_null_reg();
|
||||
#endif
|
||||
}
|
||||
|
||||
if (neg & (1 << component)) {
|
||||
reg = negate(reg);
|
||||
}
|
||||
|
|
@ -168,6 +238,46 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component,
|
|||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* This is called if we run out of GRF registers. Examine the live intervals
|
||||
* of temp regs in the program and free those which won't be used again.
|
||||
*/
|
||||
static void
|
||||
reclaim_temps(struct brw_wm_compile *c)
|
||||
{
|
||||
GLint intBegin[MAX_PROGRAM_TEMPS];
|
||||
GLint intEnd[MAX_PROGRAM_TEMPS];
|
||||
int index;
|
||||
|
||||
/*printf("Reclaim temps:\n");*/
|
||||
|
||||
_mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns,
|
||||
intBegin, intEnd);
|
||||
|
||||
for (index = 0; index < MAX_PROGRAM_TEMPS; index++) {
|
||||
if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) {
|
||||
/* program temp[i] can be freed */
|
||||
int component;
|
||||
/*printf(" temp[%d] is dead\n", index);*/
|
||||
for (component = 0; component < 4; component++) {
|
||||
if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) {
|
||||
int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr;
|
||||
release_grf(c, r);
|
||||
/*
|
||||
printf(" Reclaim temp %d, reg %d at inst %d\n",
|
||||
index, r, c->cur_inst);
|
||||
*/
|
||||
c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Preallocate registers. This sets up the Mesa to hardware register
|
||||
* mapping for certain registers, such as constants (uniforms/state vars)
|
||||
|
|
@ -179,6 +289,10 @@ static void prealloc_reg(struct brw_wm_compile *c)
|
|||
struct brw_reg reg;
|
||||
int nr_interp_regs = 0;
|
||||
GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted;
|
||||
GLuint reg_index = 0;
|
||||
|
||||
memset(c->used_grf, GL_FALSE, sizeof(c->used_grf));
|
||||
c->first_free_grf = 0;
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (i < c->key.nr_depth_regs)
|
||||
|
|
@ -187,14 +301,20 @@ static void prealloc_reg(struct brw_wm_compile *c)
|
|||
reg = brw_vec8_grf(0, 0);
|
||||
set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
|
||||
}
|
||||
c->reg_index += 2 * c->key.nr_depth_regs;
|
||||
reg_index += 2 * c->key.nr_depth_regs;
|
||||
|
||||
/* constants */
|
||||
{
|
||||
const int nr_params = c->fp->program.Base.Parameters->NumParameters;
|
||||
const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters;
|
||||
const GLuint nr_temps = c->fp->program.Base.NumTemporaries;
|
||||
|
||||
/* use a real constant buffer, or just use a section of the GRF? */
|
||||
c->fp->use_const_buffer = GL_FALSE; /* (nr_params > 8);*/
|
||||
/* XXX this heuristic may need adjustment... */
|
||||
if ((nr_params + nr_temps) * 4 + reg_index > 80)
|
||||
c->fp->use_const_buffer = GL_TRUE;
|
||||
else
|
||||
c->fp->use_const_buffer = GL_FALSE;
|
||||
/*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
|
||||
|
||||
if (c->fp->use_const_buffer) {
|
||||
/* We'll use a real constant buffer and fetch constants from
|
||||
|
|
@ -216,7 +336,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
|
|||
for (i = 0; i < nr_params; i++) {
|
||||
/* loop over XYZW channels */
|
||||
for (j = 0; j < 4; j++, index++) {
|
||||
reg = brw_vec1_grf(c->reg_index + index / 8, index % 8);
|
||||
reg = brw_vec1_grf(reg_index + index / 8, index % 8);
|
||||
/* Save pointer to parameter/constant value.
|
||||
* Constants will be copied in prepare_constant_buffer()
|
||||
*/
|
||||
|
|
@ -226,7 +346,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
|
|||
}
|
||||
/* number of constant regs used (each reg is float[8]) */
|
||||
c->nr_creg = 2 * ((4 * nr_params + 15) / 16);
|
||||
c->reg_index += c->nr_creg;
|
||||
reg_index += c->nr_creg;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -234,20 +354,24 @@ static void prealloc_reg(struct brw_wm_compile *c)
|
|||
for (i = 0; i < FRAG_ATTRIB_MAX; i++) {
|
||||
if (inputs & (1<<i)) {
|
||||
nr_interp_regs++;
|
||||
reg = brw_vec8_grf(c->reg_index, 0);
|
||||
reg = brw_vec8_grf(reg_index, 0);
|
||||
for (j = 0; j < 4; j++)
|
||||
set_reg(c, PROGRAM_PAYLOAD, i, j, reg);
|
||||
c->reg_index += 2;
|
||||
reg_index += 2;
|
||||
}
|
||||
}
|
||||
|
||||
c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
|
||||
c->prog_data.urb_read_length = nr_interp_regs * 2;
|
||||
c->prog_data.curb_read_length = c->nr_creg;
|
||||
c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
|
||||
c->reg_index++;
|
||||
c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
|
||||
c->reg_index += 2;
|
||||
c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
|
||||
reg_index++;
|
||||
c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
|
||||
reg_index += 2;
|
||||
|
||||
/* mark GRF regs [0..reg_index-1] as in-use */
|
||||
for (i = 0; i < reg_index; i++)
|
||||
prealloc_grf(c, i);
|
||||
|
||||
/* An instruction may reference up to three constants.
|
||||
* They'll be found in these registers.
|
||||
|
|
@ -256,7 +380,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
|
|||
if (c->fp->use_const_buffer) {
|
||||
for (i = 0; i < 3; i++) {
|
||||
c->current_const[i].index = -1;
|
||||
c->current_const[i].reg = alloc_tmp(c);
|
||||
c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0);
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
|
|
@ -2595,7 +2719,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
|
|||
struct brw_compile *p = &c->func;
|
||||
struct brw_indirect stack_index = brw_indirect(0, 0);
|
||||
|
||||
c->reg_index = 0;
|
||||
prealloc_reg(c);
|
||||
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
|
||||
brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
|
||||
|
|
@ -2603,6 +2726,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
|
|||
for (i = 0; i < c->nr_fp_insns; i++) {
|
||||
const struct prog_instruction *inst = &c->prog_instructions[i];
|
||||
|
||||
c->cur_inst = i;
|
||||
|
||||
#if 0
|
||||
_mesa_printf("Inst %d: ", i);
|
||||
_mesa_print_instruction(inst);
|
||||
|
|
@ -2833,17 +2958,13 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
|
|||
_mesa_printf("unsupported IR in fragment shader %d\n",
|
||||
inst->Opcode);
|
||||
}
|
||||
|
||||
if (inst->CondUpdate)
|
||||
brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
|
||||
else
|
||||
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
|
||||
}
|
||||
post_wm_emit(c);
|
||||
|
||||
if (c->reg_index >= BRW_WM_MAX_GRF) {
|
||||
_mesa_problem(NULL, "Ran out of registers in brw_wm_emit_glsl()");
|
||||
/* XXX we need to do some proper error recovery here */
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -2867,6 +2988,6 @@ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
|
|||
brw_wm_print_program(c, "brw_wm_glsl_emit done");
|
||||
}
|
||||
|
||||
c->prog_data.total_grf = c->reg_index;
|
||||
c->prog_data.total_grf = num_grf_used(c);
|
||||
c->prog_data.total_scratch = 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -268,7 +268,7 @@ brw_create_texture_surface( struct brw_context *brw,
|
|||
surf.ss0.cube_neg_z = 1;
|
||||
}
|
||||
|
||||
bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
|
||||
bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
|
||||
key, sizeof(*key),
|
||||
&key->bo, key->bo ? 1 : 0,
|
||||
&surf, sizeof(surf),
|
||||
|
|
@ -321,10 +321,11 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
|
|||
key.tiling = intelObj->mt->region->tiling;
|
||||
|
||||
dri_bo_unreference(brw->wm.surf_bo[surf]);
|
||||
brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
|
||||
&key, sizeof(key),
|
||||
&key.bo, key.bo ? 1 : 0,
|
||||
NULL);
|
||||
brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
|
||||
BRW_SS_SURFACE,
|
||||
&key, sizeof(key),
|
||||
&key.bo, key.bo ? 1 : 0,
|
||||
NULL);
|
||||
if (brw->wm.surf_bo[surf] == NULL) {
|
||||
brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key);
|
||||
}
|
||||
|
|
@ -362,7 +363,7 @@ brw_create_constant_surface( struct brw_context *brw,
|
|||
surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */
|
||||
brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */
|
||||
|
||||
bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
|
||||
bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
|
||||
key, sizeof(*key),
|
||||
&key->bo, key->bo ? 1 : 0,
|
||||
&surf, sizeof(surf),
|
||||
|
|
@ -427,7 +428,8 @@ brw_update_wm_constant_surface( GLcontext *ctx,
|
|||
*/
|
||||
|
||||
dri_bo_unreference(brw->wm.surf_bo[surf]);
|
||||
brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
|
||||
brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
|
||||
BRW_SS_SURFACE,
|
||||
&key, sizeof(key),
|
||||
&key.bo, key.bo ? 1 : 0,
|
||||
NULL);
|
||||
|
|
@ -456,17 +458,14 @@ brw_update_vs_constant_surface( GLcontext *ctx,
|
|||
|
||||
assert(surf == 0);
|
||||
|
||||
/* free old const buffer if too small */
|
||||
if (const_buffer && const_buffer->size < size) {
|
||||
dri_bo_unreference(const_buffer);
|
||||
const_buffer = NULL;
|
||||
}
|
||||
/* We always create a new VS constant buffer so that several can be
|
||||
* in flight at a time. Free the old one first...
|
||||
*/
|
||||
dri_bo_unreference(const_buffer);
|
||||
|
||||
/* alloc new buffer if needed */
|
||||
if (!const_buffer) {
|
||||
const_buffer =
|
||||
drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64);
|
||||
}
|
||||
/* alloc new buffer */
|
||||
const_buffer =
|
||||
drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64);
|
||||
|
||||
memset(&key, 0, sizeof(key));
|
||||
|
||||
|
|
@ -487,7 +486,8 @@ brw_update_vs_constant_surface( GLcontext *ctx,
|
|||
*/
|
||||
|
||||
dri_bo_unreference(brw->vs.surf_bo[surf]);
|
||||
brw->vs.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
|
||||
brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
|
||||
BRW_SS_SURFACE,
|
||||
&key, sizeof(key),
|
||||
&key.bo, key.bo ? 1 : 0,
|
||||
NULL);
|
||||
|
|
@ -569,10 +569,11 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
|
|||
dri_bo_unreference(brw->wm.surf_bo[unit]);
|
||||
brw->wm.surf_bo[unit] = NULL;
|
||||
if (cached)
|
||||
brw->wm.surf_bo[unit] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
|
||||
&key, sizeof(key),
|
||||
®ion_bo, 1,
|
||||
NULL);
|
||||
brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache,
|
||||
BRW_SS_SURFACE,
|
||||
&key, sizeof(key),
|
||||
®ion_bo, 1,
|
||||
NULL);
|
||||
|
||||
if (brw->wm.surf_bo[unit] == NULL) {
|
||||
struct brw_surface_state surf;
|
||||
|
|
@ -598,7 +599,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
|
|||
surf.ss0.writedisable_alpha = !key.color_mask[3];
|
||||
|
||||
/* Key size will never match key size for textures, so we're safe. */
|
||||
brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
|
||||
brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache,
|
||||
BRW_SS_SURFACE,
|
||||
&key, sizeof(key),
|
||||
®ion_bo, 1,
|
||||
&surf, sizeof(surf),
|
||||
|
|
@ -630,7 +632,7 @@ brw_wm_get_binding_table(struct brw_context *brw)
|
|||
|
||||
assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF);
|
||||
|
||||
bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND,
|
||||
bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
|
||||
NULL, 0,
|
||||
brw->wm.surf_bo, brw->wm.nr_surfaces,
|
||||
NULL);
|
||||
|
|
@ -646,7 +648,7 @@ brw_wm_get_binding_table(struct brw_context *brw)
|
|||
else
|
||||
data[i] = 0;
|
||||
|
||||
bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND,
|
||||
bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
|
||||
NULL, 0,
|
||||
brw->wm.surf_bo, brw->wm.nr_surfaces,
|
||||
data, data_size,
|
||||
|
|
@ -746,7 +748,7 @@ brw_vs_get_binding_table(struct brw_context *brw)
|
|||
|
||||
assert(brw->vs.nr_surfaces <= BRW_VS_MAX_SURF);
|
||||
|
||||
bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND,
|
||||
bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
|
||||
NULL, 0,
|
||||
brw->vs.surf_bo, brw->vs.nr_surfaces,
|
||||
NULL);
|
||||
|
|
@ -762,7 +764,7 @@ brw_vs_get_binding_table(struct brw_context *brw)
|
|||
else
|
||||
data[i] = 0;
|
||||
|
||||
bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND,
|
||||
bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
|
||||
NULL, 0,
|
||||
brw->vs.surf_bo, brw->vs.nr_surfaces,
|
||||
data, data_size,
|
||||
|
|
@ -787,8 +789,7 @@ brw_vs_get_binding_table(struct brw_context *brw)
|
|||
|
||||
|
||||
/**
|
||||
* Vertex shader surfaces. Just constant buffer for now. Could add vertex
|
||||
* shader textures in the future.
|
||||
* Vertex shader surfaces (constant buffer).
|
||||
*/
|
||||
static void prepare_vs_surfaces(struct brw_context *brw )
|
||||
{
|
||||
|
|
@ -824,8 +825,12 @@ prepare_surfaces(struct brw_context *brw)
|
|||
|
||||
const struct brw_tracked_state brw_wm_surfaces = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS | _NEW_PROGRAM,
|
||||
.brw = BRW_NEW_CONTEXT,
|
||||
.mesa = (_NEW_COLOR |
|
||||
_NEW_TEXTURE |
|
||||
_NEW_BUFFERS |
|
||||
_NEW_PROGRAM |
|
||||
_NEW_PROGRAM_CONSTANTS),
|
||||
.brw = (BRW_NEW_CONTEXT),
|
||||
.cache = 0
|
||||
},
|
||||
.prepare = prepare_surfaces,
|
||||
|
|
|
|||
|
|
@ -2484,7 +2484,7 @@ void r200ValidateState( GLcontext *ctx )
|
|||
r200UpdateDrawBuffer(ctx);
|
||||
}
|
||||
|
||||
if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM)) {
|
||||
if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)) {
|
||||
r200UpdateTextureState( ctx );
|
||||
new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */
|
||||
r200UpdateLocalViewer( ctx );
|
||||
|
|
@ -2523,6 +2523,7 @@ void r200ValidateState( GLcontext *ctx )
|
|||
}
|
||||
|
||||
if (new_state & (_NEW_PROGRAM|
|
||||
_NEW_PROGRAM_CONSTANTS |
|
||||
/* need to test for pretty much anything due to possible parameter bindings */
|
||||
_NEW_MODELVIEW|_NEW_PROJECTION|_NEW_TRANSFORM|
|
||||
_NEW_LIGHT|_NEW_TEXTURE|_NEW_TEXTURE_MATRIX|
|
||||
|
|
|
|||
|
|
@ -470,7 +470,8 @@ void r300TranslateFragmentShader(r300ContextPtr r300,
|
|||
fp->translated = GL_TRUE;
|
||||
if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL))
|
||||
r300FragmentProgramDump(fp, &fp->code);
|
||||
r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM);
|
||||
r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM |
|
||||
_NEW_PROGRAM_CONSTANTS);
|
||||
}
|
||||
|
||||
update_params(r300, fp);
|
||||
|
|
|
|||
|
|
@ -1109,7 +1109,7 @@ void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)
|
|||
struct gl_program_parameter_list *paramList;
|
||||
GLuint i;
|
||||
|
||||
if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM)))
|
||||
if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)))
|
||||
return;
|
||||
|
||||
fp = (struct r300_fragment_program *)ctx->FragmentProgram._Current;
|
||||
|
|
@ -2357,11 +2357,12 @@ void r300UpdateShaders(r300ContextPtr rmesa)
|
|||
hw_tcl_on = future_hw_tcl_on = 0;
|
||||
r300ResetHwState(rmesa);
|
||||
|
||||
r300UpdateStateParameters(ctx, _NEW_PROGRAM);
|
||||
r300UpdateStateParameters(ctx, _NEW_PROGRAM |
|
||||
_NEW_PROGRAM_CONSTANTS);
|
||||
return;
|
||||
}
|
||||
}
|
||||
r300UpdateStateParameters(ctx, _NEW_PROGRAM);
|
||||
r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
|
||||
}
|
||||
|
||||
static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx,
|
||||
|
|
|
|||
|
|
@ -501,7 +501,8 @@ void r500TranslateFragmentShader(r300ContextPtr r300,
|
|||
|
||||
_mesa_reference_program(r300->radeon.glCtx, &compiler.program, 0);
|
||||
|
||||
r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM);
|
||||
r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM |
|
||||
_NEW_PROGRAM_CONSTANTS);
|
||||
|
||||
if (RADEON_DEBUG & DEBUG_PIXEL) {
|
||||
if (fp->translated) {
|
||||
|
|
|
|||
|
|
@ -74,8 +74,6 @@ _mesa_BindProgram(GLenum target, GLuint id)
|
|||
GET_CURRENT_CONTEXT(ctx);
|
||||
ASSERT_OUTSIDE_BEGIN_END(ctx);
|
||||
|
||||
FLUSH_VERTICES(ctx, _NEW_PROGRAM);
|
||||
|
||||
/* Error-check target and get curProg */
|
||||
if ((target == GL_VERTEX_PROGRAM_ARB) && /* == GL_VERTEX_PROGRAM_NV */
|
||||
(ctx->Extensions.NV_vertex_program ||
|
||||
|
|
@ -132,6 +130,9 @@ _mesa_BindProgram(GLenum target, GLuint id)
|
|||
return;
|
||||
}
|
||||
|
||||
/* signal new program (and its new constants) */
|
||||
FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
|
||||
|
||||
/* bind newProg */
|
||||
if (target == GL_VERTEX_PROGRAM_ARB) { /* == GL_VERTEX_PROGRAM_NV */
|
||||
_mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current,
|
||||
|
|
@ -489,7 +490,7 @@ _mesa_ProgramEnvParameter4fARB(GLenum target, GLuint index,
|
|||
GET_CURRENT_CONTEXT(ctx);
|
||||
ASSERT_OUTSIDE_BEGIN_END(ctx);
|
||||
|
||||
FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
|
||||
FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
|
||||
|
||||
if (target == GL_FRAGMENT_PROGRAM_ARB
|
||||
&& ctx->Extensions.ARB_fragment_program) {
|
||||
|
|
@ -537,7 +538,7 @@ _mesa_ProgramEnvParameters4fvEXT(GLenum target, GLuint index, GLsizei count,
|
|||
GLfloat * dest;
|
||||
ASSERT_OUTSIDE_BEGIN_END(ctx);
|
||||
|
||||
FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
|
||||
FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
|
||||
|
||||
if (count <= 0) {
|
||||
_mesa_error(ctx, GL_INVALID_VALUE, "glProgramEnvParameters4fv(count)");
|
||||
|
|
@ -631,7 +632,7 @@ _mesa_ProgramLocalParameter4fARB(GLenum target, GLuint index,
|
|||
struct gl_program *prog;
|
||||
ASSERT_OUTSIDE_BEGIN_END(ctx);
|
||||
|
||||
FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
|
||||
FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
|
||||
|
||||
if ((target == GL_FRAGMENT_PROGRAM_NV
|
||||
&& ctx->Extensions.NV_fragment_program) ||
|
||||
|
|
@ -685,7 +686,7 @@ _mesa_ProgramLocalParameters4fvEXT(GLenum target, GLuint index, GLsizei count,
|
|||
GLint i;
|
||||
ASSERT_OUTSIDE_BEGIN_END(ctx);
|
||||
|
||||
FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
|
||||
FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
|
||||
|
||||
if (count <= 0) {
|
||||
_mesa_error(ctx, GL_INVALID_VALUE, "glProgramLocalParameters4fv(count)");
|
||||
|
|
|
|||
|
|
@ -706,7 +706,7 @@ _mesa_ProgramNamedParameter4fNV(GLuint id, GLsizei len, const GLubyte *name,
|
|||
GET_CURRENT_CONTEXT(ctx);
|
||||
ASSERT_OUTSIDE_BEGIN_END(ctx);
|
||||
|
||||
FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
|
||||
FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
|
||||
|
||||
prog = _mesa_lookup_program(ctx, id);
|
||||
if (!prog || prog->Target != GL_FRAGMENT_PROGRAM_NV) {
|
||||
|
|
|
|||
|
|
@ -547,15 +547,13 @@ update_interval(GLint intBegin[], GLint intEnd[], GLuint index, GLuint ic)
|
|||
|
||||
|
||||
/**
|
||||
* Find the live intervals for each temporary register in the program.
|
||||
* For register R, the interval [A,B] indicates that R is referenced
|
||||
* from instruction A through instruction B.
|
||||
* Special consideration is needed for loops and subroutines.
|
||||
* \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
|
||||
* Find first/last instruction that references each temporary register.
|
||||
*/
|
||||
static GLboolean
|
||||
find_live_intervals(struct gl_program *prog,
|
||||
struct interval_list *liveIntervals)
|
||||
GLboolean
|
||||
_mesa_find_temp_intervals(const struct prog_instruction *instructions,
|
||||
GLuint numInstructions,
|
||||
GLint intBegin[MAX_PROGRAM_TEMPS],
|
||||
GLint intEnd[MAX_PROGRAM_TEMPS])
|
||||
{
|
||||
struct loop_info
|
||||
{
|
||||
|
|
@ -563,26 +561,15 @@ find_live_intervals(struct gl_program *prog,
|
|||
};
|
||||
struct loop_info loopStack[MAX_LOOP_NESTING];
|
||||
GLuint loopStackDepth = 0;
|
||||
GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS];
|
||||
GLuint i;
|
||||
|
||||
/*
|
||||
* Note: we'll return GL_FALSE below if we find relative indexing
|
||||
* into the TEMP register file. We can't handle that yet.
|
||||
* We also give up on subroutines for now.
|
||||
*/
|
||||
|
||||
if (dbg) {
|
||||
_mesa_printf("Optimize: Begin find intervals\n");
|
||||
}
|
||||
|
||||
for (i = 0; i < MAX_PROGRAM_TEMPS; i++){
|
||||
intBegin[i] = intEnd[i] = -1;
|
||||
}
|
||||
|
||||
/* Scan instructions looking for temporary registers */
|
||||
for (i = 0; i < prog->NumInstructions; i++) {
|
||||
const struct prog_instruction *inst = prog->Instructions + i;
|
||||
for (i = 0; i < numInstructions; i++) {
|
||||
const struct prog_instruction *inst = instructions + i;
|
||||
if (inst->Opcode == OPCODE_BGNLOOP) {
|
||||
loopStack[loopStackDepth].Start = i;
|
||||
loopStack[loopStackDepth].End = inst->BranchTarget;
|
||||
|
|
@ -595,7 +582,7 @@ find_live_intervals(struct gl_program *prog,
|
|||
return GL_FALSE;
|
||||
}
|
||||
else {
|
||||
const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
|
||||
const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/
|
||||
GLuint j;
|
||||
for (j = 0; j < numSrc; j++) {
|
||||
if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
|
||||
|
|
@ -624,6 +611,39 @@ find_live_intervals(struct gl_program *prog,
|
|||
}
|
||||
}
|
||||
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Find the live intervals for each temporary register in the program.
|
||||
* For register R, the interval [A,B] indicates that R is referenced
|
||||
* from instruction A through instruction B.
|
||||
* Special consideration is needed for loops and subroutines.
|
||||
* \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
|
||||
*/
|
||||
static GLboolean
|
||||
find_live_intervals(struct gl_program *prog,
|
||||
struct interval_list *liveIntervals)
|
||||
{
|
||||
GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS];
|
||||
GLuint i;
|
||||
|
||||
/*
|
||||
* Note: we'll return GL_FALSE below if we find relative indexing
|
||||
* into the TEMP register file. We can't handle that yet.
|
||||
* We also give up on subroutines for now.
|
||||
*/
|
||||
|
||||
if (dbg) {
|
||||
_mesa_printf("Optimize: Begin find intervals\n");
|
||||
}
|
||||
|
||||
/* build intermediate arrays */
|
||||
if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions,
|
||||
intBegin, intEnd))
|
||||
return GL_FALSE;
|
||||
|
||||
/* Build live intervals list from intermediate arrays */
|
||||
liveIntervals->Num = 0;
|
||||
for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
|
||||
|
|
@ -794,6 +814,96 @@ _mesa_reallocate_registers(struct gl_program *prog)
|
|||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#if 0
|
||||
static void
|
||||
_mesa_find_temporary_live_intervals(struct gl_program *prog,
|
||||
GLint firstInst[MAX_PROGRAM_TEMPS],
|
||||
GLint lastInst[MAX_PROGRAM_TEMPS])
|
||||
{
|
||||
GLuint i;
|
||||
|
||||
for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
|
||||
firstInst[i] = lastInst[i] = -1;
|
||||
}
|
||||
|
||||
struct loop_info loopStack[MAX_LOOP_NESTING];
|
||||
GLuint loopStackDepth = 0;
|
||||
GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS];
|
||||
GLuint i;
|
||||
|
||||
/*
|
||||
* Note: we'll return GL_FALSE below if we find relative indexing
|
||||
* into the TEMP register file. We can't handle that yet.
|
||||
* We also give up on subroutines for now.
|
||||
*/
|
||||
|
||||
if (dbg) {
|
||||
_mesa_printf("Optimize: Begin find intervals\n");
|
||||
}
|
||||
|
||||
for (i = 0; i < MAX_PROGRAM_TEMPS; i++){
|
||||
intBegin[i] = intEnd[i] = -1;
|
||||
}
|
||||
|
||||
/* Scan instructions looking for temporary registers */
|
||||
for (i = 0; i < prog->NumInstructions; i++) {
|
||||
const struct prog_instruction *inst = prog->Instructions + i;
|
||||
if (inst->Opcode == OPCODE_BGNLOOP) {
|
||||
loopStack[loopStackDepth].Start = i;
|
||||
loopStack[loopStackDepth].End = inst->BranchTarget;
|
||||
loopStackDepth++;
|
||||
}
|
||||
else if (inst->Opcode == OPCODE_ENDLOOP) {
|
||||
loopStackDepth--;
|
||||
}
|
||||
else if (inst->Opcode == OPCODE_CAL) {
|
||||
return GL_FALSE;
|
||||
}
|
||||
else {
|
||||
const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
|
||||
GLuint j;
|
||||
for (j = 0; j < numSrc; j++) {
|
||||
if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
|
||||
const GLuint index = inst->SrcReg[j].Index;
|
||||
if (inst->SrcReg[j].RelAddr)
|
||||
return GL_FALSE;
|
||||
update_interval(intBegin, intEnd, index, i);
|
||||
if (loopStackDepth > 0) {
|
||||
/* extend temp register's interval to end of loop */
|
||||
GLuint loopEnd = loopStack[loopStackDepth - 1].End;
|
||||
update_interval(intBegin, intEnd, index, loopEnd);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (inst->DstReg.File == PROGRAM_TEMPORARY) {
|
||||
const GLuint index = inst->DstReg.Index;
|
||||
if (inst->DstReg.RelAddr)
|
||||
return GL_FALSE;
|
||||
update_interval(intBegin, intEnd, index, i);
|
||||
if (loopStackDepth > 0) {
|
||||
/* extend temp register's interval to end of loop */
|
||||
GLuint loopEnd = loopStack[loopStackDepth - 1].End;
|
||||
update_interval(intBegin, intEnd, index, loopEnd);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Apply optimizations to the given program to eliminate unnecessary
|
||||
* instructions, temp regs, etc.
|
||||
|
|
|
|||
|
|
@ -25,7 +25,19 @@
|
|||
#ifndef PROG_OPT_H
|
||||
#define PROG_OPT_H
|
||||
|
||||
|
||||
#include "main/config.h"
|
||||
|
||||
|
||||
struct gl_program;
|
||||
struct prog_instruction;
|
||||
|
||||
|
||||
extern GLboolean
|
||||
_mesa_find_temp_intervals(const struct prog_instruction *instructions,
|
||||
GLuint numInstructions,
|
||||
GLint intBegin[MAX_PROGRAM_TEMPS],
|
||||
GLint intEnd[MAX_PROGRAM_TEMPS]);
|
||||
|
||||
extern void
|
||||
_mesa_optimize_program(GLcontext *ctx, struct gl_program *program);
|
||||
|
|
|
|||
|
|
@ -1487,7 +1487,7 @@ _mesa_use_program(GLcontext *ctx, GLuint program)
|
|||
return;
|
||||
}
|
||||
|
||||
FLUSH_VERTICES(ctx, _NEW_PROGRAM);
|
||||
FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
|
||||
|
||||
if (program) {
|
||||
shProg = _mesa_lookup_shader_program_err(ctx, program, "glUseProgram");
|
||||
|
|
@ -1789,7 +1789,7 @@ _mesa_uniform(GLcontext *ctx, GLint location, GLsizei count,
|
|||
return;
|
||||
}
|
||||
|
||||
FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
|
||||
FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
|
||||
|
||||
uniform = &shProg->Uniforms->Uniforms[location];
|
||||
|
||||
|
|
@ -1929,7 +1929,7 @@ _mesa_uniform_matrix(GLcontext *ctx, GLint cols, GLint rows,
|
|||
return;
|
||||
}
|
||||
|
||||
FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
|
||||
FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS);
|
||||
|
||||
uniform = &shProg->Uniforms->Uniforms[location];
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue