[965] Convert WM unit to use a cache key instead of brw_cache_data.

This commit is contained in:
Eric Anholt 2008-01-02 15:47:47 -08:00
parent 03b59edbb5
commit 9e9f6f105c
2 changed files with 128 additions and 82 deletions

View file

@ -590,7 +590,6 @@ struct brw_context
GLuint max_threads;
dri_bo *scratch_buffer;
GLuint scratch_buffer_size;
GLuint sampler_count;
dri_bo *sampler_bo;

View file

@ -41,77 +41,96 @@
* WM unit - fragment programs and rasterization
*/
static void upload_wm_unit(struct brw_context *brw )
struct brw_wm_unit_key {
unsigned int total_grf, total_scratch;
unsigned int urb_entry_read_length;
unsigned int curb_entry_read_length;
unsigned int dispatch_grf_start_reg;
unsigned int curbe_offset;
unsigned int urb_size;
unsigned int max_threads;
unsigned int nr_surfaces, sampler_count;
GLboolean uses_depth, computes_depth, uses_kill, is_glsl;
GLboolean polygon_stipple, stats_wm;
};
static void
wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
{
const struct gl_fragment_program *fp = brw->fragment_program;
struct intel_context *intel = &brw->intel;
struct brw_wm_unit_state wm;
GLuint max_threads;
GLuint per_thread;
dri_bo *reloc_bufs[3];
if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
max_threads = 0;
key->max_threads = 1;
else
max_threads = 31;
key->max_threads = 32;
/* CACHE_NEW_WM_PROG */
key->total_grf = brw->wm.prog_data->total_grf;
key->urb_entry_read_length = brw->wm.prog_data->urb_read_length;
key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
key->total_scratch = ALIGN(brw->wm.prog_data->total_scratch, 1024);
/* BRW_NEW_URB_FENCE */
key->urb_size = brw->urb.vsize;
/* CACHE_NEW_SURFACE */
key->nr_surfaces = brw->wm.nr_surfaces;
/* CACHE_NEW_SAMPLER */
key->sampler_count = brw->wm.sampler_count;
/* _NEW_POLYGONSTIPPLE */
key->polygon_stipple = brw->attribs.Polygon->StippleFlag;
/* BRW_NEW_FRAGMENT_PROGRAM */
key->uses_depth = (fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
/* as far as we can tell */
key->computes_depth =
(fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) != 0;
/* _NEW_COLOR */
key->uses_kill = fp->UsesKill || brw->attribs.Color->AlphaEnabled;
key->is_glsl = brw_wm_is_glsl(fp);
/* XXX: This needs a flag to indicate when it changes. */
key->stats_wm = intel->stats_wm;
}
static dri_bo *
wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
dri_bo **reloc_bufs)
{
struct brw_wm_unit_state wm;
memset(&wm, 0, sizeof(wm));
/* CACHE_NEW_WM_PROG */
wm.thread0.grf_reg_count = ALIGN(brw->wm.prog_data->total_grf, 16) / 16 - 1;
wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length;
wm.thread3.const_urb_entry_read_length = brw->wm.prog_data->curb_read_length;
wm.thread1.depth_coef_urb_read_offset = 1;
wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
wm.thread1.binding_table_entry_count = key->nr_surfaces;
wm.wm5.max_threads = max_threads;
per_thread = ALIGN(brw->wm.prog_data->total_scratch, 1024);
assert(per_thread <= 12 * 1024);
if (brw->wm.prog_data->total_scratch) {
GLuint total = per_thread * (max_threads + 1);
/* Scratch space -- just have to make sure there is sufficient
* allocated for the active program and current number of threads.
*/
brw->wm.scratch_buffer_size = total;
if (brw->wm.scratch_buffer &&
brw->wm.scratch_buffer_size > brw->wm.scratch_buffer->size) {
dri_bo_unreference(brw->wm.scratch_buffer);
brw->wm.scratch_buffer = NULL;
}
if (!brw->wm.scratch_buffer) {
brw->wm.scratch_buffer = dri_bo_alloc(intel->bufmgr,
"wm scratch",
brw->wm.scratch_buffer_size,
4096, DRM_BO_FLAG_MEM_TT);
}
}
/* CACHE_NEW_SURFACE */
wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces;
/* CACHE_NEW_WM_PROG */
if (per_thread != 0) {
/* reloc */
if (key->total_scratch != 0) {
wm.thread2.scratch_space_base_pointer =
brw->wm.scratch_buffer->offset >> 10;
wm.thread2.per_thread_scratch_space = per_thread / 1024 - 1;
brw->wm.scratch_buffer->offset >> 10; /* reloc */
wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1;
} else {
wm.thread2.scratch_space_base_pointer = 0;
wm.thread2.per_thread_scratch_space = 0;
}
/* BRW_NEW_CURBE_OFFSETS */
wm.thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2;
wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg;
wm.thread3.urb_entry_read_length = key->urb_entry_read_length;
wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
wm.thread3.urb_entry_read_offset = 0;
wm.thread1.depth_coef_urb_read_offset = 1;
wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
/* CACHE_NEW_SAMPLER */
wm.wm4.sampler_count = (brw->wm.sampler_count + 1) / 4;
wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
if (brw->wm.sampler_bo != NULL) {
/* reloc */
wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5;
@ -119,27 +138,16 @@ static void upload_wm_unit(struct brw_context *brw )
wm.wm4.sampler_state_pointer = 0;
}
/* BRW_NEW_FRAGMENT_PROGRAM */
{
const struct gl_fragment_program *fp = brw->fragment_program;
wm.wm5.program_uses_depth = key->uses_depth;
wm.wm5.program_computes_depth = key->computes_depth;
wm.wm5.program_uses_killpixel = key->uses_kill;
if (fp->Base.InputsRead & (1<<FRAG_ATTRIB_WPOS))
wm.wm5.program_uses_depth = 1; /* as far as we can tell */
if (fp->Base.OutputsWritten & (1<<FRAG_RESULT_DEPR))
wm.wm5.program_computes_depth = 1;
/* _NEW_COLOR */
if (fp->UsesKill ||
brw->attribs.Color->AlphaEnabled)
wm.wm5.program_uses_killpixel = 1;
if (brw_wm_is_glsl(fp))
wm.wm5.enable_8_pix = 1;
else
wm.wm5.enable_16_pix = 1;
}
if (key->is_glsl)
wm.wm5.enable_8_pix = 1;
else
wm.wm5.enable_16_pix = 1;
wm.wm5.max_threads = key->max_threads - 1;
wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */
wm.wm5.legacy_line_rast = 0;
wm.wm5.legacy_global_depth_bias = 0;
@ -147,9 +155,7 @@ static void upload_wm_unit(struct brw_context *brw )
wm.wm5.line_aa_region_width = 0;
wm.wm5.line_endcap_aa_region_width = 1;
/* _NEW_POLYGONSTIPPLE */
if (brw->attribs.Polygon->StippleFlag)
wm.wm5.polygon_stipple = 1;
wm.wm5.polygon_stipple = key->polygon_stipple;
/* _NEW_POLYGON */
if (brw->attribs.Polygon->OffsetFill) {
@ -171,20 +177,61 @@ static void upload_wm_unit(struct brw_context *brw )
wm.wm5.line_stipple = 1;
}
if (INTEL_DEBUG & DEBUG_STATS || intel->stats_wm)
if (INTEL_DEBUG & DEBUG_STATS || key->stats_wm)
wm.wm4.stats_enable = 1;
reloc_bufs[0] = brw->wm.prog_bo;
reloc_bufs[1] = brw->wm.scratch_buffer;
reloc_bufs[2] = brw->wm.sampler_bo;
brw->wm.thread0_delta = wm.thread0.grf_reg_count << 1;
brw->wm.thread2_delta = wm.thread2.per_thread_scratch_space;
brw->wm.wm4_delta = wm.wm4.stats_enable | (wm.wm4.sampler_count << 2);
return brw_upload_cache(&brw->cache, BRW_WM_UNIT,
key, sizeof(*key),
reloc_bufs, 3,
&wm, sizeof(wm),
NULL, NULL);
}
static void upload_wm_unit( struct brw_context *brw )
{
struct intel_context *intel = &brw->intel;
struct brw_wm_unit_key key;
dri_bo *reloc_bufs[3];
wm_unit_populate_key(brw, &key);
/* Allocate the necessary scratch space if we haven't already. Don't
* bother reducing the allocation later, since we use scratch so
* rarely.
*/
assert(key.total_scratch <= 12 * 1024);
if (key.total_scratch) {
GLuint total = key.total_scratch * key.max_threads;
if (brw->wm.scratch_buffer && total > brw->wm.scratch_buffer->size) {
dri_bo_unreference(brw->wm.scratch_buffer);
brw->wm.scratch_buffer = NULL;
}
if (brw->wm.scratch_buffer == NULL) {
brw->wm.scratch_buffer = dri_bo_alloc(intel->bufmgr,
"wm scratch",
total,
4096, DRM_BO_FLAG_MEM_TT);
}
}
reloc_bufs[0] = brw->wm.prog_bo;
reloc_bufs[1] = brw->wm.scratch_buffer;
reloc_bufs[2] = brw->wm.sampler_bo;
dri_bo_unreference(brw->wm.state_bo);
brw->wm.state_bo = brw_cache_data( &brw->cache, BRW_WM_UNIT, &wm,
reloc_bufs, 3 );
brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT,
&key, sizeof(key),
reloc_bufs, 3,
NULL);
if (brw->wm.state_bo == NULL) {
brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs);
}
}
static void emit_reloc_wm_unit(struct brw_context *brw)