i965: Drop the memcmp for finding duplicated CURBE uploads.

At this point, the extra copy of the data and memcmp are as expensive as
just re-uploading.

Note: now that we'll always upload, and brw_constant_buffer watches
BRW_NEW_BATCH anyway, we don't need to explicitly unref the old curbe_bo
at batch reset time.

No significant performance difference on glamor copywinwin10 (n=55),
despite that test having a 98% hit rate on the cache.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Eric Anholt 2014-05-20 11:54:26 -07:00 committed by Kenneth Graunke
parent 44c63bdd40
commit ccda1b9ba9
4 changed files with 2 additions and 50 deletions

View file

@ -757,11 +757,6 @@ brwCreateContext(gl_api api,
brw->prim_restart.enable_cut_index = false;
brw->gs.enabled = false;
if (brw->gen < 6) {
brw->curbe.last_buf = calloc(1, 4096);
brw->curbe.next_buf = calloc(1, 4096);
}
ctx->VertexProgram._MaintainTnlProgram = true;
ctx->FragmentProgram._MaintainTexEnvProgram = true;
@ -825,9 +820,6 @@ intelDestroyContext(__DRIcontext * driContextPriv)
drm_intel_bo_unreference(brw->curbe.curbe_bo);
free(brw->curbe.last_buf);
free(brw->curbe.next_buf);
drm_intel_gem_context_destroy(brw->hw_ctx);
if (ctx->swrast_context) {

View file

@ -1234,20 +1234,6 @@ struct brw_context
drm_intel_bo *curbe_bo;
/** Offset within curbe_bo of space for current curbe entry */
GLuint curbe_offset;
/**
* Copy of the last set of CURBEs uploaded. Frequently we'll end up
* in brw_curbe.c with the same set of constant data to be uploaded,
* so we'd rather not upload new constants in that case (it can cause
* a pipeline bubble since only up to 4 can be pipelined at a time).
*/
GLfloat *last_buf;
/**
* Allocation for where to calculate the next set of CURBEs.
* It's a hot enough path that malloc/free of that data matters.
*/
GLfloat *next_buf;
GLuint last_bufsz;
} curbe;
/**

View file

@ -188,11 +188,11 @@ brw_upload_constant_buffer(struct brw_context *brw)
gl_clip_plane *clip_planes;
if (sz == 0) {
brw->curbe.last_bufsz = 0;
goto emit;
}
buf = brw->curbe.next_buf;
buf = intel_upload_space(brw, bufsz, 64,
&brw->curbe.curbe_bo, &brw->curbe.curbe_offset);
/* fragment shader constants */
if (brw->curbe.wm_size) {
@ -246,27 +246,6 @@ brw_upload_constant_buffer(struct brw_context *brw)
for (i = 0; i < sz*16; i+=4)
fprintf(stderr, "curbe %d.%d: %f %f %f %f\n", i/8, i&4,
buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
fprintf(stderr, "last_buf %p buf %p sz %d/%d cmp %d\n",
brw->curbe.last_buf, buf,
bufsz, brw->curbe.last_bufsz,
brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
}
if (brw->curbe.curbe_bo != NULL &&
bufsz == brw->curbe.last_bufsz &&
memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
/* constants have not changed */
} else {
/* Update the record of what our last set of constants was. We
* don't just flip the pointers because we don't fill in the
* data in the padding between the entries.
*/
memcpy(brw->curbe.last_buf, buf, bufsz);
brw->curbe.last_bufsz = bufsz;
intel_upload_data(brw, buf, bufsz, 64,
&brw->curbe.curbe_bo, &brw->curbe.curbe_offset);
}
/* Because this provokes an action (ie copy the constants into the

View file

@ -224,11 +224,6 @@ brw_finish_batch(struct brw_context *brw)
if (brw->batch.ring == RENDER_RING)
brw_perf_monitor_finish_batch(brw);
if (brw->curbe.curbe_bo) {
drm_intel_bo_unreference(brw->curbe.curbe_bo);
brw->curbe.curbe_bo = NULL;
}
/* Mark that the current program cache BO has been used by the GPU.
* It will be reallocated if we need to put new programs in for the
* next batch.