mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 22:38:05 +02:00
i965: Drop the memcmp for finding duplicated CURBE uploads.
At this point, the extra copy of the data and memcmp are as expensive as just re-uploading. Note: now that we'll always upload, and brw_constant_buffer watches BRW_NEW_BATCH anyway, we don't need to explicitly unref the old curbe_bo at batch reset time. No significant performance difference on glamor copywinwin10 (n=55), despite that test having a 98% hit rate on the cache. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
44c63bdd40
commit
ccda1b9ba9
4 changed files with 2 additions and 50 deletions
|
|
@ -757,11 +757,6 @@ brwCreateContext(gl_api api,
|
|||
brw->prim_restart.enable_cut_index = false;
|
||||
brw->gs.enabled = false;
|
||||
|
||||
if (brw->gen < 6) {
|
||||
brw->curbe.last_buf = calloc(1, 4096);
|
||||
brw->curbe.next_buf = calloc(1, 4096);
|
||||
}
|
||||
|
||||
ctx->VertexProgram._MaintainTnlProgram = true;
|
||||
ctx->FragmentProgram._MaintainTexEnvProgram = true;
|
||||
|
||||
|
|
@ -825,9 +820,6 @@ intelDestroyContext(__DRIcontext * driContextPriv)
|
|||
|
||||
drm_intel_bo_unreference(brw->curbe.curbe_bo);
|
||||
|
||||
free(brw->curbe.last_buf);
|
||||
free(brw->curbe.next_buf);
|
||||
|
||||
drm_intel_gem_context_destroy(brw->hw_ctx);
|
||||
|
||||
if (ctx->swrast_context) {
|
||||
|
|
|
|||
|
|
@ -1234,20 +1234,6 @@ struct brw_context
|
|||
drm_intel_bo *curbe_bo;
|
||||
/** Offset within curbe_bo of space for current curbe entry */
|
||||
GLuint curbe_offset;
|
||||
|
||||
/**
|
||||
* Copy of the last set of CURBEs uploaded. Frequently we'll end up
|
||||
* in brw_curbe.c with the same set of constant data to be uploaded,
|
||||
* so we'd rather not upload new constants in that case (it can cause
|
||||
* a pipeline bubble since only up to 4 can be pipelined at a time).
|
||||
*/
|
||||
GLfloat *last_buf;
|
||||
/**
|
||||
* Allocation for where to calculate the next set of CURBEs.
|
||||
* It's a hot enough path that malloc/free of that data matters.
|
||||
*/
|
||||
GLfloat *next_buf;
|
||||
GLuint last_bufsz;
|
||||
} curbe;
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -188,11 +188,11 @@ brw_upload_constant_buffer(struct brw_context *brw)
|
|||
gl_clip_plane *clip_planes;
|
||||
|
||||
if (sz == 0) {
|
||||
brw->curbe.last_bufsz = 0;
|
||||
goto emit;
|
||||
}
|
||||
|
||||
buf = brw->curbe.next_buf;
|
||||
buf = intel_upload_space(brw, bufsz, 64,
|
||||
&brw->curbe.curbe_bo, &brw->curbe.curbe_offset);
|
||||
|
||||
/* fragment shader constants */
|
||||
if (brw->curbe.wm_size) {
|
||||
|
|
@ -246,27 +246,6 @@ brw_upload_constant_buffer(struct brw_context *brw)
|
|||
for (i = 0; i < sz*16; i+=4)
|
||||
fprintf(stderr, "curbe %d.%d: %f %f %f %f\n", i/8, i&4,
|
||||
buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
|
||||
|
||||
fprintf(stderr, "last_buf %p buf %p sz %d/%d cmp %d\n",
|
||||
brw->curbe.last_buf, buf,
|
||||
bufsz, brw->curbe.last_bufsz,
|
||||
brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
|
||||
}
|
||||
|
||||
if (brw->curbe.curbe_bo != NULL &&
|
||||
bufsz == brw->curbe.last_bufsz &&
|
||||
memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
|
||||
/* constants have not changed */
|
||||
} else {
|
||||
/* Update the record of what our last set of constants was. We
|
||||
* don't just flip the pointers because we don't fill in the
|
||||
* data in the padding between the entries.
|
||||
*/
|
||||
memcpy(brw->curbe.last_buf, buf, bufsz);
|
||||
brw->curbe.last_bufsz = bufsz;
|
||||
|
||||
intel_upload_data(brw, buf, bufsz, 64,
|
||||
&brw->curbe.curbe_bo, &brw->curbe.curbe_offset);
|
||||
}
|
||||
|
||||
/* Because this provokes an action (ie copy the constants into the
|
||||
|
|
|
|||
|
|
@ -224,11 +224,6 @@ brw_finish_batch(struct brw_context *brw)
|
|||
if (brw->batch.ring == RENDER_RING)
|
||||
brw_perf_monitor_finish_batch(brw);
|
||||
|
||||
if (brw->curbe.curbe_bo) {
|
||||
drm_intel_bo_unreference(brw->curbe.curbe_bo);
|
||||
brw->curbe.curbe_bo = NULL;
|
||||
}
|
||||
|
||||
/* Mark that the current program cache BO has been used by the GPU.
|
||||
* It will be reallocated if we need to put new programs in for the
|
||||
* next batch.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue