i965: Drop the memcmp for finding duplicated CURBE uploads.

At this point, the extra copy of the data and memcmp are as expensive as just re-uploading. Note: now that we'll always upload, and brw_constant_buffer watches BRW_NEW_BATCH anyway, we don't need to explicitly unref the old curbe_bo at batch reset time. No significant performance difference on glamor copywinwin10 (n=55), despite that test having a 98% hit rate on the cache. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2026-05-05 22:38:05 +02:00 · 2014-05-20 11:54:26 -07:00 · 2014-05-20 11:54:26 -07:00 · ccda1b9ba9
commit ccda1b9ba9
parent 44c63bdd40
4 changed files with 2 additions and 50 deletions
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@ -757,11 +757,6 @@ brwCreateContext(gl_api api,
   brw->prim_restart.enable_cut_index = false;
   brw->gs.enabled = false;

-   if (brw->gen < 6) {
-      brw->curbe.last_buf = calloc(1, 4096);
-      brw->curbe.next_buf = calloc(1, 4096);
-   }
-
   ctx->VertexProgram._MaintainTnlProgram = true;
   ctx->FragmentProgram._MaintainTexEnvProgram = true;

@ -825,9 +820,6 @@ intelDestroyContext(__DRIcontext * driContextPriv)

   drm_intel_bo_unreference(brw->curbe.curbe_bo);

-   free(brw->curbe.last_buf);
-   free(brw->curbe.next_buf);
-
   drm_intel_gem_context_destroy(brw->hw_ctx);

   if (ctx->swrast_context) {
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@ -1234,20 +1234,6 @@ struct brw_context
      drm_intel_bo *curbe_bo;
      /** Offset within curbe_bo of space for current curbe entry */
      GLuint curbe_offset;
-
-      /**
-       * Copy of the last set of CURBEs uploaded.  Frequently we'll end up
-       * in brw_curbe.c with the same set of constant data to be uploaded,
-       * so we'd rather not upload new constants in that case (it can cause
-       * a pipeline bubble since only up to 4 can be pipelined at a time).
-       */
-      GLfloat *last_buf;
-      /**
-       * Allocation for where to calculate the next set of CURBEs.
-       * It's a hot enough path that malloc/free of that data matters.
-       */
-      GLfloat *next_buf;
-      GLuint last_bufsz;
   } curbe;

   /**
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@ -188,11 +188,11 @@ brw_upload_constant_buffer(struct brw_context *brw)
   gl_clip_plane *clip_planes;

   if (sz == 0) {
-      brw->curbe.last_bufsz  = 0;
      goto emit;
   }

-   buf = brw->curbe.next_buf;
+   buf = intel_upload_space(brw, bufsz, 64,
+                            &brw->curbe.curbe_bo, &brw->curbe.curbe_offset);

   /* fragment shader constants */
   if (brw->curbe.wm_size) {
@ -246,27 +246,6 @@ brw_upload_constant_buffer(struct brw_context *brw)
      for (i = 0; i < sz*16; i+=4)
 	 fprintf(stderr, "curbe %d.%d: %f %f %f %f\n", i/8, i&4,
                 buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
-
-      fprintf(stderr, "last_buf %p buf %p sz %d/%d cmp %d\n",
-              brw->curbe.last_buf, buf,
-              bufsz, brw->curbe.last_bufsz,
-              brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
-   }
-
-   if (brw->curbe.curbe_bo != NULL &&
-       bufsz == brw->curbe.last_bufsz &&
-       memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
-      /* constants have not changed */
-   } else {
-      /* Update the record of what our last set of constants was.  We
-       * don't just flip the pointers because we don't fill in the
-       * data in the padding between the entries.
-       */
-      memcpy(brw->curbe.last_buf, buf, bufsz);
-      brw->curbe.last_bufsz = bufsz;
-
-      intel_upload_data(brw, buf, bufsz, 64,
-                        &brw->curbe.curbe_bo, &brw->curbe.curbe_offset);
   }

   /* Because this provokes an action (ie copy the constants into the
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@ -224,11 +224,6 @@ brw_finish_batch(struct brw_context *brw)
   if (brw->batch.ring == RENDER_RING)
      brw_perf_monitor_finish_batch(brw);

-   if (brw->curbe.curbe_bo) {
-      drm_intel_bo_unreference(brw->curbe.curbe_bo);
-      brw->curbe.curbe_bo = NULL;
-   }
-
   /* Mark that the current program cache BO has been used by the GPU.
    * It will be reallocated if we need to put new programs in for the
    * next batch.