Merge branch 'master' into autoconf2

2026-03-04 21:10:31 +01:00 · 2007-12-12 21:24:06 -08:00 · 2007-12-12 21:24:06 -08:00 · f116634933
commit f116634933
parent c79c93c0ac 37406c2038
63 changed files with 1510 additions and 2895 deletions
--- a/progs/tests/Makefile
+++ b/progs/tests/Makefile
@ -60,6 +60,7 @@ SOURCES = \
 	readrate.c \
 	seccolor.c \
 	sharedtex.c \
+	stencil_twoside.c \
 	stencilwrap.c \
 	stencil_wrap.c \
 	subtexrate.c \
--- a/progs/tests/stencil_twoside.c
+++ b/progs/tests/stencil_twoside.c
@ -0,0 +1,299 @@
+/*
+ * (C) Copyright IBM Corporation 2004
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file stencil_twoside.c
+ * 
+ * Simple test of GL_ATI_separate_stencil (or the OGL 2.0 equivalent) functionality.
+ * Four squares are drawn
+ * with different stencil modes, but all should be rendered with the same
+ * final color.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <GL/glut.h>
+
+static int use20syntax = 1;
+static int Width = 550;
+static int Height = 200;
+static const GLfloat Near = 5.0, Far = 25.0;
+
+
+static PFNGLSTENCILFUNCSEPARATEPROC stencil_func_separate = NULL;
+static PFNGLSTENCILFUNCSEPARATEATIPROC stencil_func_separate_ati = NULL;
+static PFNGLSTENCILOPSEPARATEPROC stencil_op_separate = NULL;
+
+static void Display( void )
+{
+   GLint  max_stencil;
+   GLint  stencil_bits;
+   unsigned i;
+
+
+   glGetIntegerv( GL_STENCIL_BITS, & stencil_bits );
+   max_stencil = (1U << stencil_bits) - 1;
+   printf( "Stencil bits = %u, maximum stencil value = 0x%08x\n",
+	   stencil_bits, max_stencil );
+
+   glClearStencil( 1 );
+   glClearColor( 0.2, 0.2, 0.8, 0 );
+   glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT 
+	    | GL_STENCIL_BUFFER_BIT );
+
+
+   glPushMatrix();
+
+   /* This is the "reference" square.
+    */
+
+   glDisable(GL_STENCIL_TEST);
+   glTranslatef(-6.0, 0, 0);
+   glBegin(GL_QUADS);
+   glColor3f( 0.5, 0.5, 0.5 );
+   glVertex2f(-1, -1);
+   glVertex2f( 1, -1);
+   glVertex2f( 1,  1);
+   glVertex2f(-1,  1);
+   glEnd();
+
+
+   glEnable(GL_STENCIL_TEST);
+
+   /* Draw the first two squares using incr for the affected face
+    */
+
+   if (use20syntax) {
+      stencil_func_separate(GL_FRONT, GL_ALWAYS, 0, ~0);
+      stencil_func_separate(GL_BACK, GL_ALWAYS, 0, ~0);
+   }
+   else {
+      stencil_func_separate_ati(GL_ALWAYS, GL_ALWAYS, 0, ~0);
+   }
+   stencil_op_separate(GL_FRONT, GL_KEEP, GL_KEEP, GL_INCR);
+   stencil_op_separate(GL_BACK, GL_KEEP, GL_KEEP, GL_DECR);
+
+   glTranslatef(3.0, 0, 0);
+   glBegin(GL_QUADS);
+   glColor3f( 0.9, 0.9, 0.9 );
+   /* this should be front facing */
+   for ( i = 0 ; i < (max_stencil + 5) ; i++ ) {
+      glVertex2f(-1, -1);
+      glVertex2f( 1, -1);
+      glVertex2f( 1,  1);
+      glVertex2f(-1,  1);
+   }
+   glEnd();
+
+   glStencilFunc(GL_EQUAL, max_stencil, ~0);
+   glBegin(GL_QUADS);
+   glColor3f( 0.5, 0.5, 0.5 );
+   glVertex2f(-1, -1);
+   glVertex2f( 1, -1);
+   glVertex2f( 1,  1);
+   glVertex2f(-1,  1);
+   glEnd();
+
+
+   if (use20syntax) {
+      stencil_func_separate(GL_FRONT, GL_ALWAYS, 0, ~0);
+      stencil_func_separate(GL_BACK, GL_ALWAYS, 0, ~0);
+   }
+   else {
+      stencil_func_separate_ati(GL_ALWAYS, GL_ALWAYS, 0, ~0);
+   }
+   stencil_op_separate(GL_FRONT, GL_KEEP, GL_KEEP, GL_DECR);
+   stencil_op_separate(GL_BACK, GL_KEEP, GL_KEEP, GL_INCR);
+
+   glTranslatef(3.0, 0, 0);
+   glBegin(GL_QUADS);
+   glColor3f( 0.9, 0.9, 0.9 );
+
+   /* this should be back facing */
+   for ( i = 0 ; i < (max_stencil + 5) ; i++ ) {
+      glVertex2f(-1, -1);
+      glVertex2f(-1,  1);
+      glVertex2f( 1,  1);
+      glVertex2f( 1, -1);
+   }
+   glEnd();
+
+   glStencilFunc(GL_EQUAL, max_stencil, ~0);
+   glBegin(GL_QUADS);
+   glColor3f( 0.5, 0.5, 0.5 );
+   glVertex2f(-1, -1);
+   glVertex2f( 1, -1);
+   glVertex2f( 1,  1);
+   glVertex2f(-1,  1);
+   glEnd();
+
+   if (use20syntax) {
+      stencil_func_separate(GL_FRONT, GL_NEVER, 0, ~0);
+      stencil_func_separate(GL_BACK, GL_ALWAYS, 0, ~0);
+   }
+   else {
+      stencil_func_separate_ati(GL_NEVER, GL_ALWAYS, 0, ~0);
+   }
+   stencil_op_separate(GL_FRONT, GL_KEEP, GL_KEEP, GL_DECR);
+   stencil_op_separate(GL_BACK, GL_KEEP, GL_KEEP, GL_INCR);
+
+   glTranslatef(3.0, 0, 0);
+   glBegin(GL_QUADS);
+   glColor3f( 0.9, 0.9, 0.9 );
+
+   /* this should be back facing */
+   for ( i = 0 ; i < (max_stencil + 5) ; i++ ) {
+   /* this should be back facing */
+      glVertex2f(-1, -1);
+      glVertex2f(-1,  1);
+      glVertex2f( 1,  1);
+      glVertex2f( 1, -1);
+   /* this should be front facing */
+      glVertex2f(-1, -1);
+      glVertex2f( 1, -1);
+      glVertex2f( 1,  1);
+      glVertex2f(-1,  1);
+   }
+   glEnd();
+
+   glStencilFunc(GL_EQUAL, max_stencil, ~0);
+   glBegin(GL_QUADS);
+   glColor3f( 0.5, 0.5, 0.5 );
+   glVertex2f(-1, -1);
+   glVertex2f( 1, -1);
+   glVertex2f( 1,  1);
+   glVertex2f(-1,  1);
+   glEnd();
+
+   if (use20syntax) {
+      stencil_func_separate(GL_FRONT, GL_ALWAYS, 0, ~0);
+      stencil_func_separate(GL_BACK, GL_ALWAYS, 0, ~0);
+   }
+   else {
+      stencil_func_separate_ati(GL_ALWAYS, GL_ALWAYS, 0, ~0);
+   }
+   stencil_op_separate(GL_FRONT, GL_KEEP, GL_KEEP, GL_DECR);
+   stencil_op_separate(GL_BACK, GL_KEEP, GL_KEEP, GL_INCR);
+
+   glTranslatef(3.0, 0, 0);
+   glBegin(GL_QUADS);
+   glColor3f( 0.9, 0.9, 0.9 );
+
+   /* this should be back facing */
+   for ( i = 0 ; i < (max_stencil + 5) ; i++ ) {
+   /* this should be back facing */
+      glVertex2f(-1, -1);
+      glVertex2f(-1,  1);
+      glVertex2f( 1,  1);
+      glVertex2f( 1, -1);
+   /* this should be front facing */
+      glVertex2f(-1, -1);
+      glVertex2f( 1, -1);
+      glVertex2f( 1,  1);
+      glVertex2f(-1,  1);
+   }
+   glEnd();
+
+   glStencilFunc(GL_EQUAL, 1, ~0);
+   glBegin(GL_QUADS);
+   glColor3f( 0.5, 0.5, 0.5 );
+   glVertex2f(-1, -1);
+   glVertex2f( 1, -1);
+   glVertex2f( 1,  1);
+   glVertex2f(-1,  1);
+   glEnd();
+
+   glPopMatrix();
+
+   glutSwapBuffers();
+}
+
+
+static void Reshape( int width, int height )
+{
+   GLfloat ar = (float) width / (float) height;
+   Width = width;
+   Height = height;
+   glViewport( 0, 0, width, height );
+   glMatrixMode( GL_PROJECTION );
+   glLoadIdentity();
+   glFrustum( -ar, ar, -1.0, 1.0, Near, Far );
+   glMatrixMode( GL_MODELVIEW );
+   glLoadIdentity();
+   glTranslatef( 0.0, 0.0, -15.0 );
+}
+
+
+static void Key( unsigned char key, int x, int y )
+{
+   (void) x;
+   (void) y;
+   switch (key) {
+      case 27:
+         exit(0);
+         break;
+   }
+   glutPostRedisplay();
+}
+
+
+static void Init( void )
+{
+   const char * const ver_string = (const char * const)
+       glGetString( GL_VERSION );
+
+   printf("GL_RENDERER = %s\n", (char *) glGetString(GL_RENDERER));
+   printf("GL_VERSION = %s\n", ver_string);
+
+   if ( !glutExtensionSupported("GL_ATI_separate_stencil") 
+	&& (atof( ver_string ) < 2.0) ) {
+      printf("Sorry, this program requires either GL_ATI_separate_stencil or OpenGL 2.0.\n");
+      exit(1);
+   }
+   if (atof( ver_string ) < 2.0) {
+      use20syntax = 0;
+   }
+   stencil_func_separate = glutGetProcAddress( "glStencilFuncSeparate" );
+   stencil_func_separate_ati = glutGetProcAddress( "glStencilFuncSeparateATI" );
+   stencil_op_separate = glutGetProcAddress( "glStencilOpSeparate" );
+
+   printf("\nAll 5 squares should be the same color.\n");
+   glEnable( GL_BLEND );
+}
+
+
+int main( int argc, char *argv[] )
+{
+   glutInit( &argc, argv );
+   glutInitWindowPosition( 0, 0 );
+   glutInitWindowSize( Width, Height );
+   glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH | GLUT_STENCIL );
+   glutCreateWindow( "GL_ATI_separate_stencil test" );
+   glutReshapeFunc( Reshape );
+   glutKeyboardFunc( Key );
+   glutDisplayFunc( Display );
+   Init();
+   glutMainLoop();
+   return 0;
+}
--- a/src/mesa/drivers/dri/common/dri_bufmgr.h
+++ b/src/mesa/drivers/dri/common/dri_bufmgr.h
@ -203,6 +203,10 @@ dri_bufmgr *dri_bufmgr_fake_init(unsigned long low_offset, void *low_virtual,
 						   unsigned int cookie),
 				 void *driver_priv);
 void dri_bufmgr_fake_set_debug(dri_bufmgr *bufmgr, GLboolean enable_debug);
+void dri_bo_fake_disable_backing_store(dri_bo *bo,
+				       void (*invalidate_cb)(dri_bo *bo,
+							     void *ptr),
+				       void *ptr);
 void dri_bufmgr_destroy(dri_bufmgr *bufmgr);
 dri_bo *dri_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name,
 				      unsigned int handle);
--- a/src/mesa/drivers/dri/common/dri_bufmgr_fake.c
+++ b/src/mesa/drivers/dri/common/dri_bufmgr_fake.c
@ -181,7 +181,7 @@ typedef struct _dri_bo_fake {

   struct block *block;
   void *backing_store;
-   void (*invalidate_cb)(dri_bufmgr *bufmgr, void * );
+   void (*invalidate_cb)(dri_bo *bo, void *ptr);
   void *invalidate_ptr;
 } dri_bo_fake;

@ -318,9 +318,9 @@ static void
 free_backing_store(dri_bo *bo)
 {
   dri_bo_fake *bo_fake = (dri_bo_fake *)bo;
-   assert(!(bo_fake->flags & (BM_PINNED|BM_NO_BACKING_STORE)));

   if (bo_fake->backing_store) {
+      assert(!(bo_fake->flags & (BM_PINNED|BM_NO_BACKING_STORE)));
      ALIGN_FREE(bo_fake->backing_store);
      bo_fake->backing_store = NULL;
   }
@ -332,8 +332,8 @@ set_dirty(dri_bo *bo)
   dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr;
   dri_bo_fake *bo_fake = (dri_bo_fake *)bo;

-   if (bo_fake->flags & BM_NO_BACKING_STORE)
-      bo_fake->invalidate_cb(&bufmgr_fake->bufmgr, bo_fake->invalidate_ptr);
+   if (bo_fake->flags & BM_NO_BACKING_STORE && bo_fake->invalidate_cb != NULL)
+      bo_fake->invalidate_cb(bo, bo_fake->invalidate_ptr);

   assert(!(bo_fake->flags & BM_PINNED));

@ -677,6 +677,40 @@ dri_fake_bo_unreference(dri_bo *bo)
   _glthread_UNLOCK_MUTEX(bufmgr_fake->mutex);
 }

+/**
+ * Set the buffer as not requiring backing store, and instead get the callback
+ * invoked whenever it would be set dirty.
+ */
+void dri_bo_fake_disable_backing_store(dri_bo *bo,
+				       void (*invalidate_cb)(dri_bo *bo,
+							     void *ptr),
+				       void *ptr)
+{
+   dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr;
+   dri_bo_fake *bo_fake = (dri_bo_fake *)bo;
+
+   _glthread_LOCK_MUTEX(bufmgr_fake->mutex);
+
+   if (bo_fake->backing_store)
+      free_backing_store(bo);
+
+   bo_fake->flags |= BM_NO_BACKING_STORE;
+
+   DBG("disable_backing_store set buf %d dirty\n", bo_fake->id);
+   bo_fake->dirty = 1;
+   bo_fake->invalidate_cb = invalidate_cb;
+   bo_fake->invalidate_ptr = ptr;
+
+   /* Note that it is invalid right from the start.  Also note
+    * invalidate_cb is called with the bufmgr locked, so cannot
+    * itself make bufmgr calls.
+    */
+   if (invalidate_cb != NULL)
+      invalidate_cb(bo, ptr);
+
+   _glthread_UNLOCK_MUTEX(bufmgr_fake->mutex);
+}
+
 /**
 * Map a buffer into bo->virtual, allocating either card memory space (If
 * BM_NO_BACKING_STORE or BM_PINNED) or backing store, as necessary.
@ -928,6 +962,7 @@ dri_fake_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta,
   struct fake_buffer_reloc *r = &bufmgr_fake->reloc[bufmgr_fake->nr_relocs++];
   dri_bo_fake *target_fake = (dri_bo_fake *)target_buf;
   dri_bo_fake *reloc_fake = (dri_bo_fake *)reloc_buf;
+   int i;

   assert(bufmgr_fake->nr_relocs <= MAX_RELOCS);

@ -953,6 +988,17 @@ dri_fake_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta,
   r->delta = delta;
   r->validate_flags = flags;

+   /* Check that a conflicting relocation hasn't already been emitted. */
+   for (i = 0; i < bufmgr_fake->nr_relocs - 1; i++) {
+      struct fake_buffer_reloc *r2 = &bufmgr_fake->reloc[i];
+
+      assert(r->reloc_buf != r2->reloc_buf ||
+	     r->offset != r2->offset ||
+	     (r->target_buf == r2->target_buf &&
+	      r->delta == r2->delta &&
+	      r->validate_flags == r2->validate_flags));
+   }
+
   return;
 }

--- a/src/mesa/drivers/dri/i915/intel_context.c
+++ b/src/mesa/drivers/dri/i915/intel_context.c
@ -60,6 +60,7 @@
 #include "intel_buffer_objects.h"
 #include "intel_fbo.h"
 #include "intel_decode.h"
+#include "intel_bufmgr_ttm.h"

 #include "drirenderbuffer.h"
 #include "vblank.h"
@ -291,6 +292,81 @@ intelFinish(GLcontext * ctx)
   }
 }

+/** Driver-specific fence emit implementation for the fake memory manager. */
+static unsigned int
+intel_fence_emit(void *private)
+{
+   struct intel_context *intel = (struct intel_context *)private;
+   unsigned int fence;
+
+   /* XXX: Need to emit a flush, if we haven't already (at least with the
+    * current batchbuffer implementation, we have).
+    */
+
+   fence = intelEmitIrqLocked(intel);
+
+   return fence;
+}
+
+/** Driver-specific fence wait implementation for the fake memory manager. */
+static int
+intel_fence_wait(void *private, unsigned int cookie)
+{
+   struct intel_context *intel = (struct intel_context *)private;
+
+   intelWaitIrq(intel, cookie);
+
+   return 0;
+}
+
+static GLboolean
+intel_init_bufmgr(struct intel_context *intel)
+{
+   intelScreenPrivate *intelScreen = intel->intelScreen;
+   GLboolean ttm_disable = getenv("INTEL_NO_TTM") != NULL;
+
+   /* If we've got a new enough DDX that's initializing TTM and giving us
+    * object handles for the shared buffers, use that.
+    */
+   intel->ttm = GL_FALSE;
+   if (!ttm_disable &&
+       intel->intelScreen->driScrnPriv->ddx_version.minor >= 9 &&
+       intel->intelScreen->drmMinor >= 11 &&
+       intel->intelScreen->front.bo_handle != -1)
+   {
+      intel->bufmgr = intel_bufmgr_ttm_init(intel->driFd,
+					    DRM_FENCE_TYPE_EXE,
+					    DRM_FENCE_TYPE_EXE |
+					    DRM_I915_FENCE_TYPE_RW,
+					    BATCH_SZ);
+      if (intel->bufmgr != NULL)
+	 intel->ttm = GL_TRUE;
+   }
+   /* Otherwise, use the classic buffer manager. */
+   if (intel->bufmgr == NULL) {
+      if (ttm_disable) {
+	 fprintf(stderr, "TTM buffer manager disabled.  Using classic.\n");
+      } else {
+	 fprintf(stderr, "Failed to initialize TTM buffer manager.  "
+		 "Falling back to classic.\n");
+      }
+
+      if (intelScreen->tex.size == 0) {
+	 fprintf(stderr, "[%s:%u] Error initializing buffer manager.\n",
+		 __func__, __LINE__);
+	 return GL_FALSE;
+      }
+
+      intel->bufmgr = dri_bufmgr_fake_init(intelScreen->tex.offset,
+					   intelScreen->tex.map,
+					   intelScreen->tex.size,
+					   intel_fence_emit,
+					   intel_fence_wait,
+					   intel);
+   }
+
+   return GL_TRUE;
+}

 void
 intelInitDriverFunctions(struct dd_function_table *functions)
@ -338,9 +414,22 @@ intelInitContext(struct intel_context *intel,
   intel->driScreen = sPriv;
   intel->sarea = saPriv;

+   /* Dri stuff */
+   intel->hHWContext = driContextPriv->hHWContext;
+   intel->driFd = sPriv->fd;
+   intel->driHwLock = (drmLock *) & sPriv->pSAREA->lock;
+
   intel->width = intelScreen->width;
   intel->height = intelScreen->height;

+   if (intelScreen->deviceID == PCI_CHIP_I865_G)
+      intel->maxBatchSize = 4096;
+   else
+      intel->maxBatchSize = BATCH_SZ;
+
+   if (!intel_init_bufmgr(intel))
+      return GL_FALSE;
+
   if (!lockMutexInit) {
      lockMutexInit = GL_TRUE;
      _glthread_INIT_MUTEX(lockMutex);
@ -391,11 +480,6 @@ intelInitContext(struct intel_context *intel,
   _swrast_allow_pixel_fog(ctx, GL_FALSE);
   _swrast_allow_vertex_fog(ctx, GL_TRUE);

-   /* Dri stuff */
-   intel->hHWContext = driContextPriv->hHWContext;
-   intel->driFd = sPriv->fd;
-   intel->driHwLock = (drmLock *) & sPriv->pSAREA->lock;
-
   intel->hw_stipple = 1;

   /* XXX FBO: this doesn't seem to be used anywhere */
@ -436,9 +520,10 @@ intelInitContext(struct intel_context *intel,
 /* 		      GL_TRUE, */
                     GL_FALSE);

-   if (intelScreen->ttm)
+   if (intel->ttm)
      driInitExtensions(ctx, ttm_extensions, GL_FALSE);

+   intel_recreate_static_regions(intel);

   intel->batch = intel_batchbuffer_alloc(intel);
   intel->last_swap_fence = NULL;
@ -457,11 +542,10 @@ intelInitContext(struct intel_context *intel,

   intel->prim.primitive = ~0;

-
 #if DO_DEBUG
   INTEL_DEBUG = driParseDebugString(getenv("INTEL_DEBUG"), debug_control);
-   if (!intel->intelScreen->ttm && (INTEL_DEBUG & DEBUG_BUFMGR))
-      dri_bufmgr_fake_set_debug(intel->intelScreen->bufmgr, GL_TRUE);
+   if (!intel->ttm && (INTEL_DEBUG & DEBUG_BUFMGR))
+      dri_bufmgr_fake_set_debug(intel->bufmgr, GL_TRUE);
 #endif

   if (getenv("INTEL_NO_RAST")) {
@ -507,6 +591,7 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv)
 	 intel->first_swap_fence = NULL;
      }

+      dri_bufmgr_destroy(intel->bufmgr);

      if (release_texture_heaps) {
         /* This share group is about to go away, free our private
@ -551,21 +636,21 @@ intelMakeCurrent(__DRIcontextPrivate * driContextPriv,

         if (intel_fb->color_rb[0] && !intel_fb->color_rb[0]->region) {
            intel_region_reference(&intel_fb->color_rb[0]->region,
-				   intel->intelScreen->front_region);
+				   intel->front_region);
         }
         if (intel_fb->color_rb[1] && !intel_fb->color_rb[1]->region) {
            intel_region_reference(&intel_fb->color_rb[1]->region,
-				   intel->intelScreen->back_region);
+				   intel->back_region);
         }
         if (intel_fb->color_rb[2] && !intel_fb->color_rb[2]->region) {
            intel_region_reference(&intel_fb->color_rb[2]->region,
-				   intel->intelScreen->third_region);
+				   intel->third_region);
         }
         if (irbDepth && !irbDepth->region) {
-            intel_region_reference(&irbDepth->region, intel->intelScreen->depth_region);
+            intel_region_reference(&irbDepth->region, intel->depth_region);
         }
         if (irbStencil && !irbStencil->region) {
-            intel_region_reference(&irbStencil->region, intel->intelScreen->depth_region);
+            intel_region_reference(&irbStencil->region, intel->depth_region);
         }
      }

@ -618,7 +703,6 @@ intelContendedLock(struct intel_context *intel, GLuint flags)
 {
   __DRIdrawablePrivate *dPriv = intel->driDrawable;
   __DRIscreenPrivate *sPriv = intel->driScreen;
-   intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private;
   drmI830Sarea *sarea = intel->sarea;

   drmGetLock(intel->driFd, intel->hHWContext, flags);
@ -639,9 +723,9 @@ intelContendedLock(struct intel_context *intel, GLuint flags)
    * between contexts of a single fake bufmgr, but this will at least make
    * things correct for now.
    */
-   if (!intel->intelScreen->ttm && sarea->texAge != intel->hHWContext) {
+   if (!intel->ttm && sarea->texAge != intel->hHWContext) {
      sarea->texAge = intel->hHWContext;
-      dri_bufmgr_fake_contended_lock_take(intel->intelScreen->bufmgr);
+      dri_bufmgr_fake_contended_lock_take(intel->bufmgr);
      if (INTEL_DEBUG & DEBUG_BATCH)
 	 intel_decode_context_reset();
   }
--- a/src/mesa/drivers/dri/i915/intel_context.h
+++ b/src/mesa/drivers/dri/i915/intel_context.h
@ -34,6 +34,7 @@
 #include "drm.h"
 #include "mm.h"
 #include "texmem.h"
+#include "dri_bufmgr.h"

 #include "intel_screen.h"
 #include "intel_tex_obj.h"
@ -135,16 +136,32 @@ struct intel_context

      void (*assert_not_dirty) (struct intel_context *intel);

+      void (*debug_batch)(struct intel_context *intel);
   } vtbl;

   GLint refcount;
   GLuint Fallback;
   GLuint NewGLState;

+   dri_bufmgr *bufmgr;
+   unsigned int maxBatchSize;
+
+   struct intel_region *front_region;
+   struct intel_region *back_region;
+   struct intel_region *third_region;
+   struct intel_region *depth_region;
+
+   /**
+    * This value indicates that the kernel memory manager is being used
+    * instead of the fake client-side memory manager.
+    */
+   GLboolean ttm;
+
   dri_fence *last_swap_fence;
   dri_fence *first_swap_fence;

   struct intel_batchbuffer *batch;
+   unsigned batch_id;
   GLuint last_state_batch_id;

   struct
--- a/src/mesa/drivers/dri/i915/intel_ioctl.c
+++ b/src/mesa/drivers/dri/i915/intel_ioctl.c
@ -47,15 +47,14 @@
 #define FILE_DEBUG_FLAG DEBUG_IOCTL

 int
-intelEmitIrqLocked(intelScreenPrivate *intelScreen)
+intelEmitIrqLocked(struct intel_context *intel)
 {
   drmI830IrqEmit ie;
   int ret, seq;

   ie.irq_seq = &seq;

-   ret = drmCommandWriteRead(intelScreen->driScrnPriv->fd,
-			     DRM_I830_IRQ_EMIT, &ie, sizeof(ie));
+   ret = drmCommandWriteRead(intel->driFd, DRM_I830_IRQ_EMIT, &ie, sizeof(ie));
   if (ret) {
      fprintf(stderr, "%s: drmI830IrqEmit: %d\n", __FUNCTION__, ret);
      exit(1);
@ -67,7 +66,7 @@ intelEmitIrqLocked(intelScreenPrivate *intelScreen)
 }

 void
-intelWaitIrq(intelScreenPrivate *intelScreen, int seq)
+intelWaitIrq(struct intel_context *intel, int seq)
 {
   drm_i915_irq_wait_t iw;
   int ret;
@ -77,8 +76,7 @@ intelWaitIrq(intelScreenPrivate *intelScreen, int seq)
   iw.irq_seq = seq;

   do {
-      ret = drmCommandWrite(intelScreen->driScrnPriv->fd,
-			    DRM_I830_IRQ_WAIT, &iw, sizeof(iw));
+      ret = drmCommandWrite(intel->driFd, DRM_I830_IRQ_WAIT, &iw, sizeof(iw));
   } while (ret == -EAGAIN || ret == -EINTR);

   if (ret) {
@ -170,7 +168,7 @@ intel_exec_ioctl(struct intel_context *intel,
   }


-   fo = intel_ttm_fence_create_from_arg(intel->intelScreen->bufmgr, "fence buffers",
+   fo = intel_ttm_fence_create_from_arg(intel->bufmgr, "fence buffers",
 					&execbuf.fence_arg);
   if (!fo) {
      fprintf(stderr, "failed to fence handle: %08x\n", execbuf.fence_arg.handle);
--- a/src/mesa/drivers/dri/i915/intel_ioctl.h
+++ b/src/mesa/drivers/dri/i915/intel_ioctl.h
@ -30,8 +30,8 @@

 #include "intel_context.h"

-void intelWaitIrq(intelScreenPrivate *intelScreen, int seq);
-int intelEmitIrqLocked(intelScreenPrivate *intelScreen);
+void intelWaitIrq(struct intel_context *intel, int seq);
+int intelEmitIrqLocked(struct intel_context *intel);

 void intel_batch_ioctl(struct intel_context *intel,
                       GLuint start_offset,
--- a/src/mesa/drivers/dri/i915/intel_pixel_copy.c
+++ b/src/mesa/drivers/dri/i915/intel_pixel_copy.c
@ -54,9 +54,8 @@ copypix_src_region(struct intel_context *intel, GLenum type)
   case GL_DEPTH:
      /* Don't think this is really possible execpt at 16bpp, when we have no stencil.
       */
-      if (intel->intelScreen->depth_region && 
-	  intel->intelScreen->depth_region->cpp == 2)
-         return intel->intelScreen->depth_region;
+      if (intel->depth_region && intel->depth_region->cpp == 2)
+         return intel->depth_region;
   case GL_STENCIL:
      /* Don't think this is really possible. 
       */
@ -64,7 +63,7 @@ copypix_src_region(struct intel_context *intel, GLenum type)
   case GL_DEPTH_STENCIL_EXT:
      /* Does it matter whether it is stencil/depth or depth/stencil?
       */
-      return intel->intelScreen->depth_region;
+      return intel->depth_region;
   default:
      break;
   }
@ -164,7 +163,7 @@ do_texture_copypixels(GLcontext * ctx,

   /* Set the 3d engine to draw into the destination region:
    */
-   intel->vtbl.meta_draw_region(intel, dst, intel->intelScreen->depth_region);
+   intel->vtbl.meta_draw_region(intel, dst, intel->depth_region);

   intel->vtbl.meta_import_pixel_state(intel);

--- a/src/mesa/drivers/dri/i915/intel_pixel_draw.c
+++ b/src/mesa/drivers/dri/i915/intel_pixel_draw.c
@ -112,7 +112,7 @@ do_texture_drawpixels(GLcontext * ctx,

   /* Set the 3d engine to draw into the destination region:
    */
-   intel->vtbl.meta_draw_region(intel, dst, intel->intelScreen->depth_region);
+   intel->vtbl.meta_draw_region(intel, dst, intel->depth_region);

   intel->vtbl.meta_import_pixel_state(intel);

--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@ -5,11 +5,11 @@ include $(TOP)/configs/current
 LIBNAME = i965_dri.so

 DRIVER_SOURCES = \
-	bufmgr_fake.c \
 	intel_batchbuffer.c \
 	intel_blit.c \
 	intel_buffer_objects.c \
 	intel_buffers.c \
+	intel_bufmgr_ttm.c \
 	intel_context.c \
 	intel_decode.c \
 	intel_ioctl.c \
@ -53,6 +53,7 @@ DRIVER_SOURCES = \
 	brw_sf_state.c \
 	brw_state_batch.c \
 	brw_state_cache.c \
+	brw_state_dump.c \
 	brw_state_pool.c \
 	brw_state_upload.c \
 	brw_tex.c \
@ -80,6 +81,7 @@ DRIVER_SOURCES = \

 C_SOURCES = \
 	$(COMMON_SOURCES) \
+	$(COMMON_BM_SOURCES) \
 	$(MINIGLX_SOURCES) \
 	$(DRIVER_SOURCES)

--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@ -242,7 +242,7 @@ struct brw_surface_binding_table {
 struct brw_cache;

 struct brw_mem_pool {
-   struct buffer *buffer;
+   dri_bo *buffer;

   GLuint size;
   GLuint offset;		/* offset of first free byte */
@ -310,6 +310,8 @@ struct brw_state_pointers {
 struct brw_tracked_state {
   struct brw_state_flags dirty;
   void (*update)( struct brw_context *brw );
+   void (*emit_reloc)( struct brw_context *brw );
+   GLboolean always_update;
 };


@ -596,16 +598,17 @@ struct brw_context
      GLuint input_size_masks[4];


-      /* State structs
+      /**
+       * Array of sampler state uploaded at sampler_gs_offset of BRW_SAMPLER
+       * cache
       */
-      struct brw_sampler_default_color sdc[BRW_MAX_TEX_UNIT];
      struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT];

      GLuint render_surf;
      GLuint nr_surfaces;      

      GLuint max_threads;
-      struct buffer *scratch_buffer;
+      dri_bo *scratch_buffer;
      GLuint scratch_buffer_size;

      GLuint sampler_count;
@ -659,6 +662,10 @@ void brw_init_state( struct brw_context *brw );
 void brw_destroy_state( struct brw_context *brw );


+/*======================================================================
+ * brw_state_dump.c
+ */
+void brw_debug_batch(struct intel_context *intel);

 /*======================================================================
 * brw_tex.c
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@ -312,11 +312,7 @@ static void upload_constant_buffer(struct brw_context *brw)

      /* Copy data to the buffer:
       */
-      bmBufferSubData(&brw->intel,
-		      pool->buffer,
-		      brw->curbe.gs_offset,
-		      bufsz,
-		      buf);
+      dri_bo_subdata(pool->buffer, brw->curbe.gs_offset, bufsz, buf);
   }

   /* Because this provokes an action (ie copy the constants into the
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@ -289,7 +289,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
   LOCK_HARDWARE(intel);

   if (brw->intel.numClipRects == 0) {
-      assert(intel->batch->ptr == intel->batch->map + intel->batch->offset);
+      assert(intel->batch->ptr == intel->batch->map);
      UNLOCK_HARDWARE(intel);
      return GL_TRUE;
   }
@ -358,14 +358,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
    * way around this, as not every flush is due to a buffer filling
    * up.
    */
-   if (!intel_batchbuffer_flush( brw->intel.batch )) {
-      DBG("%s intel_batchbuffer_flush failed\n", __FUNCTION__);
-      retval = GL_FALSE;
-   }
-
-   if (retval && intel->thrashing) {
-      bmSetFence(intel);
-   }
+   intel_batchbuffer_flush( brw->intel.batch );

   /* Free any old data so it doesn't clog up texture memory - we
    * won't be referencing it again.
@ -425,7 +418,6 @@ void brw_draw_prims( GLcontext *ctx,
 		     GLuint min_index,
 		     GLuint max_index )
 {
-   struct intel_context *intel = intel_context(ctx);
   GLboolean retval;

   /* Decide if we want to rebase.  If so we end up recursing once
@ -445,20 +437,6 @@ void brw_draw_prims( GLcontext *ctx,
    */
   retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);

-   
-   /* This looks like out-of-memory but potentially we have
-    * situation where there is enough memory but it has become
-    * fragmented.  Clear out all heaps and start from scratch by
-    * faking a contended lock event:  (done elsewhere)
-    */
-   if (!retval && !intel->Fallback && bmError(intel)) {
-      DBG("retrying\n");
-      /* Then try a second time only to upload textures and draw the
-       * primitives:
-       */
-      retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
-   }
-
   /* Otherwise, we really are out of memory.  Pass the drawing
    * command to the software tnl module and which will in turn call
    * swrast to do the drawing.
@ -469,13 +447,6 @@ void brw_draw_prims( GLcontext *ctx,
   }
 }

-
-static void brw_invalidate_vbo_cb( struct intel_context *intel, void *ptr )
-{
-   /* nothing to do, we don't rely on the contents being preserved */
-}
-
-
 void brw_draw_init( struct brw_context *brw )
 {
   GLcontext *ctx = &brw->intel.ctx;
@ -490,22 +461,25 @@ void brw_draw_init( struct brw_context *brw )

   for (i = 0; i < BRW_NR_UPLOAD_BUFS; i++) {
      brw->vb.upload.vbo[i] = ctx->Driver.NewBufferObject(ctx, 1, GL_ARRAY_BUFFER_ARB);
-      
-      /* NOTE:  These are set to no-backing-store.
-       */
-      bmBufferSetInvalidateCB(&brw->intel,
-			      intel_bufferobj_buffer(intel_buffer_object(brw->vb.upload.vbo[i])),
-			      brw_invalidate_vbo_cb,
-			      &brw->intel,
-			      GL_TRUE);
-   }

-   ctx->Driver.BufferData( ctx, 
-			   GL_ARRAY_BUFFER_ARB, 
-			   BRW_UPLOAD_INIT_SIZE,
-			   NULL,
-			   GL_DYNAMIC_DRAW_ARB,
-			   brw->vb.upload.vbo[0] );
+      ctx->Driver.BufferData(ctx,
+			     GL_ARRAY_BUFFER_ARB,
+			     BRW_UPLOAD_INIT_SIZE,
+			     NULL,
+			     GL_DYNAMIC_DRAW_ARB,
+			     brw->vb.upload.vbo[i]);
+
+      /* Set the internal VBOs to no-backing-store.  We only use them as a
+       * temporary within a brw_try_draw_prims while the lock is held.
+       */
+      if (!brw->intel.ttm) {
+	 struct intel_buffer_object *intel_bo =
+	    intel_buffer_object(brw->vb.upload.vbo[i]);
+
+	 dri_bo_fake_disable_backing_store(intel_bufferobj_buffer(intel_bo),
+					   NULL, NULL);
+      }
+   }
 }

 void brw_draw_destroy( struct brw_context *brw )
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@ -58,7 +58,7 @@ struct brw_array_state {
 	 GLuint dword;
      } vb0;
   
-      struct buffer *buffer;
+      dri_bo *buffer;
      GLuint offset;

      GLuint max_index;   
@ -68,7 +68,7 @@ struct brw_array_state {
 };


-static struct buffer *array_buffer( const struct gl_client_array *array )
+static dri_bo *array_buffer( const struct gl_client_array *array )
 {
   return intel_bufferobj_buffer(intel_buffer_object(array->BufferObj));
 }
@ -621,7 +621,7 @@ void brw_upload_indices( struct brw_context *brw,
    */
   {
      struct brw_indexbuffer ib;
-      struct buffer *buffer = intel_bufferobj_buffer(intel_buffer_object(bufferobj));
+      dri_bo *buffer = intel_bufferobj_buffer(intel_buffer_object(bufferobj));

      memset(&ib, 0, sizeof(ib));
   
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@ -123,21 +123,18 @@ const struct brw_tracked_state brw_drawing_rect = {
   .update = upload_drawing_rect
 };

-/***********************************************************************
- * Binding table pointers
+/**
+ * Upload the binding table pointers, which point each stage's array of surface
+ * state pointers.
+ *
+ * The binding table pointers are relative to the surface state base address,
+ * which is the BRW_SS_POOL cache buffer.
 */
-
 static void upload_binding_table_pointers(struct brw_context *brw)
 {
   struct brw_binding_table_pointers btp;
   memset(&btp, 0, sizeof(btp));

-   /* The binding table has been emitted to the SS pool already, so we
-    * know what its offset is.  When the batch buffer is fired, the
-    * binding table and surface structs will get fixed up to point to
-    * where the textures actually landed, but that won't change the
-    * value of the offsets here:
-    */
   btp.header.opcode = CMD_BINDING_TABLE_PTRS;
   btp.header.length = sizeof(btp)/4 - 2;
   btp.vs = 0;
@ -159,11 +156,12 @@ const struct brw_tracked_state brw_binding_table_pointers = {
 };


-/***********************************************************************
- * Pipelined state pointers.  This is the key state packet from which
- * the hardware chases pointers to all the uploaded state in VRAM.
+/**
+ * Upload pointers to the per-stage state.
+ *
+ * The state pointers in this packet are all relative to the general state
+ * base address set by CMD_STATE_BASE_ADDRESS, which is the BRW_GS_POOL buffer.
 */
-   
 static void upload_pipelined_state_pointers(struct brw_context *brw )
 {
   struct brw_pipelined_state_pointers psp;
@ -233,71 +231,53 @@ const struct brw_tracked_state brw_psp_urb_cbs = {
   .update = upload_psp_urb_cbs
 };

-
-
-
-/***********************************************************************
- * Depthbuffer - currently constant, but rotation would change that.
+/**
+ * Upload the depthbuffer offset and format.
+ *
+ * We have to do this per state validation as we need to emit the relocation
+ * in the batch buffer.
 */
-
 static void upload_depthbuffer(struct brw_context *brw)
 {
-   /* 0x79050003  Depth Buffer */
   struct intel_context *intel = &brw->intel;
   struct intel_region *region = brw->state.depth_region;
-   struct brw_depthbuffer bd;
-   memset(&bd, 0, sizeof(bd));

-   bd.header.bits.opcode = CMD_DEPTH_BUFFER;
-   bd.header.bits.length = sizeof(bd)/4-2;
-   bd.dword1.bits.pitch = (region->pitch * region->cpp) - 1;
-   
+   unsigned int format;
+
   switch (region->cpp) {
   case 2:
-      bd.dword1.bits.format = BRW_DEPTHFORMAT_D16_UNORM;
+      format = BRW_DEPTHFORMAT_D16_UNORM;
      break;
   case 4:
      if (intel->depth_buffer_is_float)
-	 bd.dword1.bits.format = BRW_DEPTHFORMAT_D32_FLOAT;
+	 format = BRW_DEPTHFORMAT_D32_FLOAT;
      else
-	 bd.dword1.bits.format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+	 format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
      break;
   default:
      assert(0);
      return;
   }

-   bd.dword1.bits.depth_offset_disable = 0; /* coordinate offset */
-
-   /* The depthbuffer can only use YMAJOR tiling...  This is a bit of
-    * a shame as it clashes with the 2d blitter which only supports
-    * XMAJOR tiling...  
-    */
-   bd.dword1.bits.tile_walk = BRW_TILEWALK_YMAJOR;
-   bd.dword1.bits.tiled_surface = intel->depth_region->tiled;
-   bd.dword1.bits.surface_type = BRW_SURFACE_2D;
-
-   /* BRW_NEW_LOCK */
-   bd.dword2_base_addr = bmBufferOffset(intel, region->buffer);    
-
-   bd.dword3.bits.mipmap_layout = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
-   bd.dword3.bits.lod = 0;
-   bd.dword3.bits.width = region->pitch - 1; /* XXX: width ? */
-   bd.dword3.bits.height = region->height - 1;
-
-   bd.dword4.bits.min_array_element = 0;
-   bd.dword4.bits.depth = 0;
-      
-   BRW_CACHED_BATCH_STRUCT(brw, &bd);
+   BEGIN_BATCH(5, INTEL_BATCH_NO_CLIPRECTS);
+   OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (5 - 2));
+   OUT_BATCH(((region->pitch * region->cpp) - 1) |
+	     (format << 18) |
+	     (BRW_TILEWALK_YMAJOR << 26) |
+	     (region->tiled << 27) |
+	     (BRW_SURFACE_2D << 29));
+   OUT_RELOC(region->buffer,
+	     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, 0);
+   OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
+	     ((region->pitch - 1) << 6) |
+	     ((region->height - 1) << 19));
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
 }

 const struct brw_tracked_state brw_depthbuffer = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_CONTEXT | BRW_NEW_LOCK,
-      .cache = 0
-   },
-   .update = upload_depthbuffer
+   .update = upload_depthbuffer,
+   .always_update = GL_TRUE,
 };


@ -494,40 +474,37 @@ const struct brw_tracked_state brw_invarient_state = {
   .update = upload_invarient_state
 };

-
-/* State pool addresses:
+/**
+ * Define the base addresses which some state is referenced from.
+ *
+ * This allows us to avoid having to emit relocations in many places for
+ * cached state, and instead emit pointers inside of large, mostly-static
+ * state pools.  This comes at the expense of memory, and more expensive cache
+ * misses.
 */
 static void upload_state_base_address( struct brw_context *brw )
 {
   struct intel_context *intel = &brw->intel;
-   struct brw_state_base_address sba;
-      
-   memset(&sba, 0, sizeof(sba));

-   sba.header.opcode = CMD_STATE_BASE_ADDRESS;
-   sba.header.length = 0x4;
-
-   /* BRW_NEW_LOCK */
-   sba.bits0.general_state_address = bmBufferOffset(intel, brw->pool[BRW_GS_POOL].buffer) >> 5;
-   sba.bits0.modify_enable = 1;
-
-   /* BRW_NEW_LOCK */
-   sba.bits1.surface_state_address = bmBufferOffset(intel, brw->pool[BRW_SS_POOL].buffer) >> 5;
-   sba.bits1.modify_enable = 1;
-
-   sba.bits2.modify_enable = 1;
-   sba.bits3.modify_enable = 1;
-   sba.bits4.modify_enable = 1;
-
-   BRW_CACHED_BATCH_STRUCT(brw, &sba);
+   /* Output the structure (brw_state_base_address) directly to the
+    * batchbuffer, so we can emit relocations inline.
+    */
+   BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS);
+   OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
+   OUT_RELOC(brw->pool[BRW_GS_POOL].buffer,
+	     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+	     1); /* General state base address */
+   OUT_RELOC(brw->pool[BRW_SS_POOL].buffer,
+	     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+	     1); /* Surface state base address */
+   OUT_BATCH(1); /* Indirect object base address */
+   OUT_BATCH(1); /* General state upper bound */
+   OUT_BATCH(1); /* Indirect object upper bound */
+   ADVANCE_BATCH();
 }


 const struct brw_tracked_state brw_state_base_address = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_CONTEXT | BRW_NEW_LOCK,
-      .cache = 0
-   },
+   .always_update = GL_TRUE,
   .update = upload_state_base_address
 };
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@ -107,6 +107,12 @@ GLboolean brw_search_cache( struct brw_cache *cache,
 void brw_init_caches( struct brw_context *brw );
 void brw_destroy_caches( struct brw_context *brw );

+static inline dri_bo *brw_cache_buffer(struct brw_context *brw,
+				       enum brw_cache_id id)
+{
+   return brw->cache[id].pool->buffer;
+}
+
 /***********************************************************************
 * brw_state_batch.c
 */
--- a/src/mesa/drivers/dri/i965/brw_state_batch.c
+++ b/src/mesa/drivers/dri/i965/brw_state_batch.c
@ -95,8 +95,6 @@ static void clear_batch_cache( struct brw_context *brw )

   brw_clear_all_caches(brw);

-   bmReleaseBuffers(&brw->intel);
-   
   brw_invalidate_pools(brw);
 }

--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@ -187,12 +187,7 @@ GLuint brw_upload_cache( struct brw_cache *cache,

   /* Copy data to the buffer:
    */
-   bmBufferSubData(&cache->brw->intel,
-		   cache->pool->buffer,
-		   offset,
-		   data_size,
-		   data);
-   
+   dri_bo_subdata(cache->pool->buffer, offset, data_size, data);

   cache->brw->state.dirty.cache |= 1<<cache->id;
   cache->last_addr = offset;
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@ -0,0 +1,131 @@
+/*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "mtypes.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+/**
+ * Prints out a header, the contents, and the message associated with
+ * the hardware state data given.
+ *
+ * \param name Name of the state object
+ * \param data Pointer to the base of the state object
+ * \param hw_offset Hardware offset of the base of the state data.
+ * \param index Index of the DWORD being output.
+ */
+static void
+state_out(char *name, uint32_t *data, uint32_t hw_offset, int index,
+	  char *fmt, ...)
+{
+    va_list va;
+
+    fprintf(stderr, "%8s: 0x%08x: 0x%08x: ",
+	    name, hw_offset + index * 4, data[index]);
+    va_start(va, fmt);
+    vfprintf(stderr, fmt, va);
+    va_end(va);
+}
+
+/** Generic, undecoded state buffer debug printout */
+static void
+state_struct_out(char *name, dri_bo *buffer, unsigned int pool_offset,
+		 unsigned int state_size)
+{
+   int i;
+   uint32_t *state;
+
+   state = buffer->virtual + pool_offset;
+   for (i = 0; i < state_size / 4; i++) {
+      state_out(name, state, buffer->offset + pool_offset, i,
+		"dword %d\n", i);
+   }
+}
+
+static void dump_wm_surface_state(struct brw_context *brw, dri_bo *ss_buffer)
+{
+   int i;
+
+   for (i = 0; i < brw->wm.nr_surfaces; i++) {
+      unsigned int surfoff = ss_buffer->offset + brw->wm.bind.surf_ss_offset[i];
+      struct brw_surface_state *surf =
+	 (struct brw_surface_state *)(ss_buffer->virtual +
+				      brw->wm.bind.surf_ss_offset[i]);
+      uint32_t *surfvals = (uint32_t *)surf;
+      char name[20];
+
+      sprintf(name, "WM SS%d", i);
+      state_out(name, surfvals, surfoff, 0, "\n");
+      state_out(name, surfvals, surfoff, 1, "offset\n");
+      state_out(name, surfvals, surfoff, 2, "%dx%d size, %d mips\n",
+		surf->ss2.width + 1, surf->ss2.height + 1, surf->ss2.mip_count);
+      state_out(name, surfvals, surfoff, 3, "pitch %d, %stiled\n",
+		surf->ss3.pitch + 1, surf->ss3.tiled_surface ? "" : "not ");
+      state_out(name, surfvals, surfoff, 4, "mip base %d\n",
+		surf->ss4.min_lod);
+   }
+}
+
+/**
+ * Print additional debug information associated with the batchbuffer
+ * when DEBUG_BATCH is set.
+ *
+ * For 965, this means mapping the state buffers that would have been referenced
+ * by the batchbuffer and dumping them.
+ *
+ * The buffer offsets printed rely on the buffer containing the last offset
+ * it was validated at.
+ */
+void brw_debug_batch(struct intel_context *intel)
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+   dri_bo *ss_buffer, *gs_buffer;
+
+   ss_buffer = brw->pool[BRW_SS_POOL].buffer;
+   gs_buffer = brw->pool[BRW_GS_POOL].buffer;
+
+   dri_bo_map(ss_buffer, GL_FALSE);
+   dri_bo_map(gs_buffer, GL_FALSE);
+
+   state_struct_out("WM bind", ss_buffer, brw->wm.bind_ss_offset,
+		    4 * brw->wm.nr_surfaces);
+   dump_wm_surface_state(brw, ss_buffer);
+
+   state_struct_out("VS", gs_buffer, brw->vs.state_gs_offset,
+		    sizeof(struct brw_vs_unit_state));
+   state_struct_out("SF", gs_buffer, brw->sf.state_gs_offset,
+		    sizeof(struct brw_sf_unit_state));
+   state_struct_out("SF viewport", gs_buffer, brw->sf.state_gs_offset,
+		    sizeof(struct brw_sf_unit_state));
+   state_struct_out("WM", gs_buffer, brw->wm.state_gs_offset,
+		    sizeof(struct brw_wm_unit_state));
+
+   dri_bo_unmap(gs_buffer);
+   dri_bo_unmap(ss_buffer);
+}
--- a/src/mesa/drivers/dri/i965/brw_state_pool.c
+++ b/src/mesa/drivers/dri/i965/brw_state_pool.c
@ -34,7 +34,7 @@
 #include "imports.h"

 #include "intel_ioctl.h"
-#include "bufmgr.h"
+#include "dri_bufmgr.h"

 GLboolean brw_pool_alloc( struct brw_mem_pool *pool,
 			  GLuint size,
@ -64,28 +64,21 @@ void brw_invalidate_pool( struct intel_context *intel,
 {
   if (INTEL_DEBUG & DEBUG_STATE)
      _mesa_printf("\n\n\n %s \n\n\n", __FUNCTION__);
-   
-   bmBufferData(intel,
-		pool->buffer,
-		pool->size,
-		NULL,
-		0); 

   pool->offset = 0;

   brw_clear_all_caches(pool->brw);
 }

-static void brw_invalidate_pool_cb( struct intel_context *intel, void *ptr )
+static void
+brw_invalidate_pool_cb(dri_bo *bo, void *ptr)
 {
-   struct brw_mem_pool *pool = (struct brw_mem_pool *) ptr;
+   struct brw_mem_pool *pool = ptr;
+   struct brw_context *brw = pool->brw;

-   pool->offset = 0;
-   brw_clear_all_caches(pool->brw);
+   brw_invalidate_pool(&brw->intel, pool);
 }

-
-
 static void brw_init_pool( struct brw_context *brw,
 			   GLuint pool_id,
 			   GLuint size )
@ -94,30 +87,28 @@ static void brw_init_pool( struct brw_context *brw,

   pool->size = size;   
   pool->brw = brw;
-   
-   bmGenBuffers(&brw->intel, "pool", 1, &pool->buffer, 12);

-   /* Also want to say not to wait on fences when data is presented
+   pool->buffer = dri_bo_alloc(brw->intel.bufmgr,
+			       (pool_id == BRW_GS_POOL) ? "GS pool" : "SS pool",
+			       size, 4096, DRM_BO_FLAG_MEM_TT);
+
+   /* Disable the backing store for the state cache.  It's not worth the
+    * cost of keeping a backing store copy, since we can just regenerate
+    * the contents at approximately the same cost as the memcpy, and only
+    * if the contents are lost.
    */
-   bmBufferSetInvalidateCB(&brw->intel, pool->buffer, 
-			   brw_invalidate_pool_cb, 
-			   pool,
-			   GL_TRUE);   
-
-   bmBufferData(&brw->intel,
-		pool->buffer,
-		pool->size,
-		NULL,
-		0); 
-
+   if (!brw->intel.ttm) {
+      dri_bo_fake_disable_backing_store(pool->buffer, brw_invalidate_pool_cb,
+					pool);
+   }
 }

 static void brw_destroy_pool( struct brw_context *brw,
 			      GLuint pool_id )
 {
   struct brw_mem_pool *pool = &brw->pool[pool_id];
-   
-   bmDeleteBuffers(&brw->intel, 1, &pool->buffer);
+
+   dri_bo_unreference(pool->buffer);
 }


--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@ -33,7 +33,7 @@

 #include "brw_context.h"
 #include "brw_state.h"
-#include "bufmgr.h"
+#include "dri_bufmgr.h"
 #include "intel_batchbuffer.h"

 /* This is used to initialize brw->state.atoms[].  We could use this
@ -210,14 +210,6 @@ void brw_validate_state( struct brw_context *brw )
   if (brw->state.dirty.brw & BRW_NEW_CONTEXT)
      brw_clear_batch_cache_flush(brw);

-
-   /* Make an early reference to the state pools, as we don't cope
-    * well with them being evicted from here down.
-    */
-   (void)bmBufferOffset(&brw->intel, brw->pool[BRW_GS_POOL].buffer);
-   (void)bmBufferOffset(&brw->intel, brw->pool[BRW_SS_POOL].buffer);
-   (void)bmBufferOffset(&brw->intel, brw->intel.batch->buffer);
-
   if (INTEL_DEBUG) {
      /* Debug version which enforces various sanity checks on the
       * state flags which are generated and checked to help ensure
@ -233,14 +225,17 @@ void brw_validate_state( struct brw_context *brw )

 	 assert(atom->dirty.mesa ||
 		atom->dirty.brw ||
-		atom->dirty.cache);
+		atom->dirty.cache ||
+		atom->always_update);
 	 assert(atom->update);

-	 if (check_state(state, &atom->dirty)) {
-	    brw->state.atoms[i]->update( brw );
+	 if (check_state(state, &atom->dirty) || atom->always_update) {
+	    atom->update( brw );
 	    
 /* 	    emit_foo(brw); */
 	 }
+	 if (atom->emit_reloc != NULL)
+	    atom->emit_reloc(brw);

 	 accumulate_state(&examined, &atom->dirty);

@ -255,8 +250,12 @@ void brw_validate_state( struct brw_context *brw )
   }
   else {
      for (i = 0; i < Elements(atoms); i++) {	 
-	 if (check_state(state, &brw->state.atoms[i]->dirty))
-	    brw->state.atoms[i]->update( brw );
+	 const struct brw_tracked_state *atom = brw->state.atoms[i];
+
+	 if (check_state(state, &atom->dirty) || atom->always_update)
+	    atom->update( brw );
+	 if (atom->emit_reloc != NULL)
+	    atom->emit_reloc(brw);
      }
   }

--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@ -35,6 +35,7 @@

 #include "intel_mipmap_tree.h"
 #include "intel_tex_layout.h"
+#include "intel_context.h"
 #include "macros.h"

 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@ -49,7 +49,7 @@
 #include "brw_state.h"
 #include "brw_fallback.h"
 #include "brw_vs.h"
-
+#include <stdarg.h>


 /* called from intelDestroyContext()
@ -97,7 +97,7 @@ static void brw_lost_hardware( struct intel_context *intel )

   /* Which means there shouldn't be any commands already queued:
    */
-   assert(intel->batch->ptr == intel->batch->map + intel->batch->offset);
+   assert(intel->batch->ptr == intel->batch->map);

   brw->state.dirty.mesa |= ~0;
   brw->state.dirty.brw |= ~0;
@ -153,9 +153,6 @@ static GLuint brw_flush_cmd( void )
   return *(GLuint *)&flush;
 }

-
-
-
 static void brw_invalidate_state( struct intel_context *intel, GLuint new_state )
 {
   /* nothing */
@ -178,5 +175,6 @@ void brwInitVtbl( struct brw_context *brw )
   brw->intel.vtbl.set_draw_region = brw_set_draw_region;
   brw->intel.vtbl.flush_cmd = brw_flush_cmd;
   brw->intel.vtbl.emit_flush = brw_emit_flush;
+   brw->intel.vtbl.debug_batch = brw_debug_batch;
 }

--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@ -34,25 +34,19 @@
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
+#include "dri_bufmgr.h"
 #include "brw_wm.h"
-#include "bufmgr.h"

 /***********************************************************************
 * WM unit - fragment programs and rasterization
 */

-static void invalidate_scratch_cb( struct intel_context *intel,
-				   void *unused )
-{
-   /* nothing */
-}
-
-
 static void upload_wm_unit(struct brw_context *brw )
 {
   struct intel_context *intel = &brw->intel;
   struct brw_wm_unit_state wm;
   GLuint max_threads;
+   GLuint per_thread;

   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
      max_threads = 0; 
@ -71,41 +65,37 @@ static void upload_wm_unit(struct brw_context *brw )

   wm.wm5.max_threads = max_threads;      

+   per_thread = ALIGN(brw->wm.prog_data->total_scratch, 1024);
+   assert(per_thread <= 12 * 1024);
+
   if (brw->wm.prog_data->total_scratch) {
-      GLuint per_thread = ALIGN(brw->wm.prog_data->total_scratch, 1024);
      GLuint total = per_thread * (max_threads + 1);

      /* Scratch space -- just have to make sure there is sufficient
       * allocated for the active program and current number of threads.
-       */      
-
-      if (!brw->wm.scratch_buffer) {
-	 bmGenBuffers(intel, "wm scratch", 1, &brw->wm.scratch_buffer, 12);
-	 bmBufferSetInvalidateCB(intel,
-				 brw->wm.scratch_buffer,
-				 invalidate_scratch_cb,
-				 NULL,
-				 GL_FALSE);
-      }
-
-      if (total > brw->wm.scratch_buffer_size) {
-	 brw->wm.scratch_buffer_size = total;
-	 bmBufferData(intel,
-		      brw->wm.scratch_buffer,
-		      brw->wm.scratch_buffer_size,
-		      NULL,
-		      0);
-      }
-		   
-      assert(per_thread <= 12 * 1024);
-      wm.thread2.per_thread_scratch_space = (per_thread / 1024) - 1;
-
-      /* XXX: could make this dynamic as this is so rarely active:
       */
-      /* BRW_NEW_LOCK */
-      wm.thread2.scratch_space_base_pointer = 
-	 bmBufferOffset(intel, brw->wm.scratch_buffer) >> 10;
+      brw->wm.scratch_buffer_size = total;
+      if (brw->wm.scratch_buffer &&
+	  brw->wm.scratch_buffer_size > brw->wm.scratch_buffer->size) {
+	 dri_bo_unreference(brw->wm.scratch_buffer);
+	 brw->wm.scratch_buffer = NULL;
+      }
+      if (!brw->wm.scratch_buffer) {
+	 brw->wm.scratch_buffer = dri_bo_alloc(intel->bufmgr,
+					       "wm scratch",
+					       brw->wm.scratch_buffer_size,
+					       4096, DRM_BO_FLAG_MEM_TT);
+      }
   }
+   /* XXX: Scratch buffers are not implemented correectly.
+    *
+    * The scratch offset to be programmed into wm is relative to the general
+    * state base address.  However, using dri_bo_alloc/dri_bo_emit_reloc (or
+    * the previous bmGenBuffers scheme), we get an offset relative to the
+    * start of framebuffer.  Even before then, it was broken in other ways,
+    * so just fail for now if we hit that path.
+    */
+   assert(brw->wm.prog_data->total_scratch == 0);

   /* CACHE_NEW_SURFACE */
   wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces;
@ -177,6 +167,19 @@ static void upload_wm_unit(struct brw_context *brw )
      wm.wm4.stats_enable = 1;

   brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm );
+
+   if (brw->wm.prog_data->total_scratch) {
+      /*
+      dri_emit_reloc(brw->cache[BRW_WM_UNIT].pool->buffer,
+		     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE,
+		     (per_thread / 1024) - 1,
+		     brw->wm.state_gs_offset +
+		     ((char *)&wm.thread2 - (char *)&wm),
+		     brw->wm.scratch_buffer);
+      */
+   } else {
+      wm.thread2.scratch_space_base_pointer = 0;
+   }
 }

 const struct brw_tracked_state brw_wm_unit = {
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@ -140,49 +140,49 @@ static GLuint translate_tex_format( GLuint mesa_format )
 }

 static
-void brw_update_texture_surface( GLcontext *ctx, 
-				 GLuint unit,
-				 struct brw_surface_state *surf )
+void brw_update_texture_surface( GLcontext *ctx, GLuint unit )
 {
-   struct intel_context *intel = intel_context(ctx);
   struct brw_context *brw = brw_context(ctx);
   struct gl_texture_object *tObj = brw->attribs.Texture->Unit[unit]._Current;
   struct intel_texture_object *intelObj = intel_texture_object(tObj);
   struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
+   struct brw_surface_state surf;

-   memset(surf, 0, sizeof(*surf));
+   memset(&surf, 0, sizeof(surf));

-   surf->ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;   
-   surf->ss0.surface_type = translate_tex_target(tObj->Target);
-   surf->ss0.surface_format = translate_tex_format(firstImage->TexFormat->MesaFormat);
+   surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+   surf.ss0.surface_type = translate_tex_target(tObj->Target);
+   surf.ss0.surface_format = translate_tex_format(firstImage->TexFormat->MesaFormat);

   /* This is ok for all textures with channel width 8bit or less:
    */
-/*    surf->ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
+/*    surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */

-   /* BRW_NEW_LOCK */
-   surf->ss1.base_addr = bmBufferOffset(intel,
-					intelObj->mt->region->buffer);
+   /* Updated in emit_reloc */
+   surf.ss1.base_addr = intelObj->mt->region->buffer->offset;

-   surf->ss2.mip_count = intelObj->lastLevel - intelObj->firstLevel;
-   surf->ss2.width = firstImage->Width - 1;
-   surf->ss2.height = firstImage->Height - 1;
+   surf.ss2.mip_count = intelObj->lastLevel - intelObj->firstLevel;
+   surf.ss2.width = firstImage->Width - 1;
+   surf.ss2.height = firstImage->Height - 1;

-   surf->ss3.tile_walk = BRW_TILEWALK_XMAJOR;
-   surf->ss3.tiled_surface = intelObj->mt->region->tiled; /* always zero */
-   surf->ss3.pitch = (intelObj->mt->pitch * intelObj->mt->cpp) - 1;
-   surf->ss3.depth = firstImage->Depth - 1;
+   surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
+   surf.ss3.tiled_surface = intelObj->mt->region->tiled; /* always zero */
+   surf.ss3.pitch = (intelObj->mt->pitch * intelObj->mt->cpp) - 1;
+   surf.ss3.depth = firstImage->Depth - 1;

-   surf->ss4.min_lod = 0;
+   surf.ss4.min_lod = 0;
 
   if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
-      surf->ss0.cube_pos_x = 1;
-      surf->ss0.cube_pos_y = 1;
-      surf->ss0.cube_pos_z = 1;
-      surf->ss0.cube_neg_x = 1;
-      surf->ss0.cube_neg_y = 1;
-      surf->ss0.cube_neg_z = 1;
+      surf.ss0.cube_pos_x = 1;
+      surf.ss0.cube_pos_y = 1;
+      surf.ss0.cube_pos_z = 1;
+      surf.ss0.cube_neg_x = 1;
+      surf.ss0.cube_neg_y = 1;
+      surf.ss0.cube_neg_z = 1;
   }
+
+   brw->wm.bind.surf_ss_offset[unit + 1] =
+      brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf );
 }


@ -194,11 +194,8 @@ static void upload_wm_surfaces(struct brw_context *brw )
 {
   GLcontext *ctx = &brw->intel.ctx;
   struct intel_context *intel = &brw->intel;
-   struct brw_surface_binding_table bind;
   GLuint i;

-   memcpy(&bind, &brw->wm.bind, sizeof(bind));
-      
   {
      struct brw_surface_state surf;
      struct intel_region *region = brw->state.draw_region;
@ -222,9 +219,8 @@ static void upload_wm_surfaces(struct brw_context *brw )
      surf.ss0.writedisable_blue =  !brw->attribs.Color->ColorMask[2];
      surf.ss0.writedisable_alpha = !brw->attribs.Color->ColorMask[3];

-      /* BRW_NEW_LOCK */
-      surf.ss1.base_addr = bmBufferOffset(&brw->intel, region->buffer);
-
+      /* Updated in emit_reloc */
+      surf.ss1.base_addr = region->buffer->offset;

      surf.ss2.width = region->pitch - 1; /* XXX: not really! */
      surf.ss2.height = region->height - 1;
@ -233,6 +229,7 @@ static void upload_wm_surfaces(struct brw_context *brw )
      surf.ss3.pitch = (region->pitch * region->cpp) - 1;

      brw->wm.bind.surf_ss_offset[0] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf );
+
      brw->wm.nr_surfaces = 1;
   }

@ -243,13 +240,9 @@ static void upload_wm_surfaces(struct brw_context *brw )
      /* _NEW_TEXTURE, BRW_NEW_TEXDATA 
       */
      if (texUnit->_ReallyEnabled &&
-	  intel_finalize_mipmap_tree(intel,texUnit->_Current)) {
-
-	 struct brw_surface_state surf;
-
-	 brw_update_texture_surface(ctx, i, &surf);
-
-	 brw->wm.bind.surf_ss_offset[i+1] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf );
+	  intel_finalize_mipmap_tree(intel,texUnit->_Current))
+      {
+	 brw_update_texture_surface(ctx, i);
 	 brw->wm.nr_surfaces = i+2;
      }
      else if( texUnit->_ReallyEnabled &&
@ -267,14 +260,43 @@ static void upload_wm_surfaces(struct brw_context *brw )
 					    &brw->wm.bind );
 }

+static void emit_reloc_wm_surfaces(struct brw_context *brw)
+{
+   int unit;
+
+   /* Emit framebuffer relocation */
+   dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE),
+		  DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE,
+		  0,
+		  brw->wm.bind.surf_ss_offset[0] +
+		  offsetof(struct brw_surface_state, ss1),
+		  brw->state.draw_region->buffer);
+
+   /* Emit relocations for texture buffers */
+   for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) {
+      struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit];
+      struct gl_texture_object *tObj = texUnit->_Current;
+      struct intel_texture_object *intelObj = intel_texture_object(tObj);
+
+      if (texUnit->_ReallyEnabled && intelObj->mt != NULL) {
+	 dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE),
+			DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+			0,
+			brw->wm.bind.surf_ss_offset[unit + 1] +
+			offsetof(struct brw_surface_state, ss1),
+			intelObj->mt->region->buffer);
+      }
+   }
+}
+
 const struct brw_tracked_state brw_wm_surfaces = {
   .dirty = {
      .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS,
-      .brw = (BRW_NEW_CONTEXT | 
-	      BRW_NEW_LOCK),	/* required for bmBufferOffset */
+      .brw = BRW_NEW_CONTEXT,
      .cache = 0
   },
-   .update = upload_wm_surfaces
+   .update = upload_wm_surfaces,
+   .emit_reloc = emit_reloc_wm_surfaces,
 };


--- a/src/mesa/drivers/dri/i965/bufmgr.h
+++ b/src/mesa/drivers/dri/i965/bufmgr.h
@ -1,186 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef BUFMGR_H
-#define BUFMGR_H
-
-#include "intel_context.h"
-
-
-/* The buffer manager context.  Opaque.
- */
-struct bufmgr;
-struct buffer;
-
-
-struct bufmgr *bm_fake_intel_Attach( struct intel_context *intel ); 
-
-/* Flags for validate and other calls.  If both NO_UPLOAD and NO_EVICT
- * are specified, ValidateBuffers is essentially a query.
- */
-#define BM_MEM_LOCAL   0x1
-#define BM_MEM_AGP     0x2
-#define BM_MEM_VRAM    0x4	/* not yet used */
-#define BM_WRITE       0x8	/* not yet used */
-#define BM_READ        0x10	/* not yet used */
-#define BM_NO_UPLOAD   0x20
-#define BM_NO_EVICT    0x40
-#define BM_NO_MOVE     0x80	/* not yet used */
-#define BM_NO_ALLOC    0x100	/* legacy "fixed" buffers only */
-#define BM_CLIENT      0x200	/* for map - pointer will be accessed
-				 * without dri lock */
-
-#define BM_MEM_MASK (BM_MEM_LOCAL|BM_MEM_AGP|BM_MEM_VRAM)
-
-
-
-
-/* Create a pool of a given memory type, from a certain offset and a
- * certain size.  
- *
- * Also passed in is a virtual pointer to the start of the pool.  This
- * is useful in the faked-out version in i915 so that MapBuffer can
- * return a pointer to a buffer residing in AGP space.  
- *
- * Flags passed into a pool are inherited by all buffers allocated in
- * that pool.  So pools representing the static front,back,depth
- * buffer allocations should have MEM_AGP|NO_UPLOAD|NO_EVICT|NO_MOVE to match
- * the behaviour of the legacy allocations.
- *
- * Returns -1 for failure, pool number for success.
- */
-int bmInitPool( struct intel_context *, 
-		unsigned long low_offset,
-		void *low_virtual,
-		unsigned long size,
-		unsigned flags);
-
-
-/* Stick closely to ARB_vbo semantics - they're well defined and
- * understood, and drivers can just pass the calls through without too
- * much thunking.
- */
-void bmGenBuffers(struct intel_context *, const char *, unsigned n, struct buffer **buffers,
-		  int align );
-void bmDeleteBuffers(struct intel_context *, unsigned n, struct buffer **buffers);
-
-
-/* Hook to inform faked buffer manager about fixed-position
- * front,depth,back buffers.  These may move to a fully memory-managed
- * scheme, or they may continue to be managed as is.
- */
-struct buffer *bmGenBufferStatic(struct intel_context *,
-				 unsigned pool);
-
-/* On evict, buffer manager will call invalidate_cb() to note that the
- * buffer needs to be reloaded.
- *
- * Buffer is uploaded by calling bmMapBuffer() and copying data into
- * the returned pointer.
- *
- * This is basically a big hack to get some more performance by
- * turning off backing store for buffers where we either have it
- * already (textures) or don't need it (batch buffers, temporary
- * vbo's).
- */
-void bmBufferSetInvalidateCB(struct intel_context *,
-			     struct buffer *buf,
-			     void (*invalidate_cb)( struct intel_context *, void *ptr ),
-			     void *ptr,
-			     GLboolean dont_fence_subdata);
-
-
-/* The driver has more intimate knowledge of the hardare than a GL
- * client would, so flags here is more proscriptive than the usage
- * values in the ARB_vbo interface:
- */
-int bmBufferData(struct intel_context *, 
-		  struct buffer *buf, 
-		  unsigned size, 
-		  const void *data, 
-		  unsigned flags );
-
-int bmBufferSubData(struct intel_context *, 
-		     struct buffer *buf, 
-		     unsigned offset, 
-		     unsigned size, 
-		     const void *data );
-
-/* In this version, taking the offset will provoke an upload on
- * buffers not already resident in AGP:
- */
-unsigned bmBufferOffset(struct intel_context *, 
-			struct buffer *buf);
-
-
-/* Extract data from the buffer:
- */
-void bmBufferGetSubData(struct intel_context *, 
-			struct buffer *buf, 
-			unsigned offset, 
-			unsigned size, 
-			void *data );
-
-void *bmMapBuffer( struct intel_context *,
-		   struct buffer *buf, 
-		   unsigned access );
-
-void bmUnmapBuffer( struct intel_context *,
-		    struct buffer *buf );
-
-/* Pertains to all buffers who's offset has been taken since the last
- * fence or release.
- */
-int bmValidateBuffers( struct intel_context * );
-void bmReleaseBuffers( struct intel_context * );
-
-GLuint bmCtxId( struct intel_context *intel );
-
-
-GLboolean bmError( struct intel_context * );
-void bmEvictAll( struct intel_context * );
-
-void *bmFindVirtual( struct intel_context *intel,
-		     unsigned int offset,
-		     size_t sz );
-
-/* This functionality is used by the buffer manager, not really sure
- * if we need to be exposing it in this way, probably libdrm will
- * offer equivalent calls.
- *
- * For now they can stay, but will likely change/move before final:
- */
-unsigned bmSetFence( struct intel_context * );
-unsigned bmSetFenceLock( struct intel_context * );
-unsigned bmLockAndFence( struct intel_context *intel );
-int bmTestFence( struct intel_context *, unsigned fence );
-void bmFinishFence( struct intel_context *, unsigned fence );
-void bmFinishFenceLock( struct intel_context *, unsigned fence );
-
-void bm_fake_NotifyContendedLockTake( struct intel_context * );
-
-#endif
--- a/src/mesa/drivers/dri/i965/bufmgr_fake.c
+++ b/src/mesa/drivers/dri/i965/bufmgr_fake.c
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@ -1,236 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "imports.h"
-#include "intel_batchbuffer.h"
-#include "intel_ioctl.h"
-#include "intel_decode.h"
-#include "bufmgr.h"
-
-#define FILE_DEBUG_FLAG DEBUG_BATCH
-
-static void intel_batchbuffer_reset( struct intel_batchbuffer *batch )
-{
-   assert(batch->map == NULL);
-
-   batch->offset = (unsigned long)batch->ptr;
-   batch->offset = ALIGN(batch->offset, 64);
-   batch->ptr = (unsigned char *) batch->offset;
-
-   if (BATCH_SZ - batch->offset < BATCH_REFILL) {
-      bmBufferData(batch->intel, 
-		   batch->buffer,
-		   BATCH_SZ, 
-		   NULL, 
-		   0); 
-      batch->offset = 0;
-      batch->ptr = NULL;
-   }
-		
-   batch->flags = 0;
-}
-
-static void intel_batchbuffer_reset_cb( struct intel_context *intel,
-					void *ptr )
-{
-   struct intel_batchbuffer *batch = (struct intel_batchbuffer *)ptr;
-   assert(batch->map == NULL);
-   batch->flags = 0;
-   batch->offset = 0;
-   batch->ptr = NULL;
-}
-
-GLubyte *intel_batchbuffer_map( struct intel_batchbuffer *batch )
-{
-   if (!batch->map) {
-      batch->map = bmMapBuffer(batch->intel, batch->buffer, 
-			       BM_MEM_AGP|BM_MEM_LOCAL|BM_CLIENT|BM_WRITE);
-      batch->ptr += (unsigned long)batch->map;
-   }
-
-   return batch->map;
-}
-
-void intel_batchbuffer_unmap( struct intel_batchbuffer *batch )
-{
-   if (batch->map) {
-      batch->ptr -= (unsigned long)batch->map;
-      batch->map = NULL;
-      bmUnmapBuffer(batch->intel, batch->buffer);
-   }
-}
-
-
-
-/*======================================================================
- * Public functions
- */
-struct intel_batchbuffer *intel_batchbuffer_alloc( struct intel_context *intel )
-{
-   struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1);
-
-   batch->intel = intel;
-
-   bmGenBuffers(intel, "batch", 1, &batch->buffer, 12);
-
-   bmBufferSetInvalidateCB(intel, batch->buffer,
-			   intel_batchbuffer_reset_cb,
-			   batch,
-			   GL_TRUE);
-
-   bmBufferData(batch->intel,
-		batch->buffer,
-		BATCH_SZ,
-		NULL,
-		0);
-
-
-   return batch;
-}
-
-void intel_batchbuffer_free( struct intel_batchbuffer *batch )
-{
-   if (batch->map) 
-      bmUnmapBuffer(batch->intel, batch->buffer);
-   
-   bmDeleteBuffers(batch->intel, 1, &batch->buffer);
-   free(batch);
-}
-
-
-#define MI_BATCH_BUFFER_END 	(0xA<<23)
-
-
-GLboolean intel_batchbuffer_flush( struct intel_batchbuffer *batch )
-{
-   struct intel_context *intel = batch->intel;
-   GLuint used = batch->ptr - (batch->map + batch->offset);
-   GLuint offset;
-   GLint retval = GL_TRUE;
-
-   assert(intel->locked);
-
-   if (used == 0) {
-      bmReleaseBuffers( batch->intel );
-      return GL_TRUE;
-   }
-
-   /* Add the MI_BATCH_BUFFER_END.  Always add an MI_FLUSH - this is a
-    * performance drain that we would like to avoid.
-    */
-   if (used & 4) {
-      ((int *)batch->ptr)[0] = MI_BATCH_BUFFER_END;
-      batch->ptr += 4;
-      used += 4;
-   }
-   else {
-      ((int *)batch->ptr)[0] = 0;
-      ((int *)batch->ptr)[1] = MI_BATCH_BUFFER_END;
-
-      batch->ptr += 8;
-      used += 8;
-   }
-
-   intel_batchbuffer_unmap(batch);
-
-   /* Get the batch buffer offset: Must call bmBufferOffset() before
-    * bmValidateBuffers(), otherwise the buffer won't be on the inuse
-    * list.
-    */
-   offset = bmBufferOffset(batch->intel, batch->buffer);
-
-   if (bmValidateBuffers( batch->intel ) != 0) {
-      assert(intel->locked);
-      bmReleaseBuffers( batch->intel );
-      retval = GL_FALSE;
-      goto out;
-   }
-
-   if (INTEL_DEBUG & DEBUG_BATCH) {
-      char *map;
-
-      map = bmMapBuffer(batch->intel, batch->buffer,
-			BM_MEM_AGP|BM_MEM_LOCAL|BM_CLIENT);
-      intel_decode((uint32_t *)(map + batch->offset), used / 4,
-		   offset + batch->offset, intel->intelScreen->deviceID);
-      bmUnmapBuffer(batch->intel, batch->buffer);
-   }
-
-   /* Fire the batch buffer, which was uploaded above:
-    */
-   intel_batch_ioctl(batch->intel, 
-		     offset + batch->offset,
-		     used);
-
-   /* Reset the buffer:
-    */
- out:
-   intel_batchbuffer_reset( batch );
-   intel_batchbuffer_map( batch );
-
-   if (!retval)
-      DBG("%s failed\n", __FUNCTION__);
-
-   return retval;
-}
-
-
-
-
-
-
-
-void intel_batchbuffer_align( struct intel_batchbuffer *batch,
-			      GLuint align,
-			      GLuint sz )
-{
-   unsigned long ptr = (unsigned long) batch->ptr;
-   unsigned long aptr = ALIGN(ptr, align);
-   GLuint fixup = aptr - ptr;
-
-   if (intel_batchbuffer_space(batch) < fixup + sz)
-      intel_batchbuffer_flush(batch);
-   else {
-      memset(batch->ptr, 0, fixup);      
-      batch->ptr += fixup;
-   }
-}
-
-
-
-
-void intel_batchbuffer_data(struct intel_batchbuffer *batch,
-			    const void *data,
-			    GLuint bytes,
-			    GLuint flags)
-{
-   assert((bytes & 3) == 0);
-   intel_batchbuffer_require_space(batch, bytes, flags);
-   __memcpy(batch->ptr, data, bytes);
-   batch->ptr += bytes;
-}
-
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@ -0,0 +1 @@
+../intel/intel_batchbuffer.c
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
@ -1,133 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef INTEL_BATCHBUFFER_H
-#define INTEL_BATCHBUFFER_H
-
-#include "mtypes.h"
-#include "bufmgr.h"
-
-struct intel_context;
-
-#define BATCH_SZ (16 * 1024)
-#define BATCH_REFILL 4096
-#define BATCH_RESERVED 16
-
-#define INTEL_BATCH_NO_CLIPRECTS 0x1
-#define INTEL_BATCH_CLIPRECTS    0x2
-
-struct intel_batchbuffer {
-   struct intel_context *intel;
-
-   struct buffer *buffer;
-
-   GLuint flags;
-   unsigned long offset;
-
-   GLubyte *map;
-   GLubyte *ptr; 
-};
-
-struct intel_batchbuffer *intel_batchbuffer_alloc( struct intel_context *intel );
-
-void intel_batchbuffer_free( struct intel_batchbuffer *batch );
-
-
-GLboolean intel_batchbuffer_flush( struct intel_batchbuffer *batch );
-
-void intel_batchbuffer_unmap( struct intel_batchbuffer *batch );
-GLubyte *intel_batchbuffer_map( struct intel_batchbuffer *batch );
-
-
-/* Unlike bmBufferData, this currently requires the buffer be mapped.
- * Consider it a convenience function wrapping multple
- * intel_buffer_dword() calls.
- */
-void intel_batchbuffer_data(struct intel_batchbuffer *batch,
-			    const void *data,
-			    GLuint bytes,
-			    GLuint flags);
-
-void intel_batchbuffer_release_space(struct intel_batchbuffer *batch,
-				   GLuint bytes);
-
-
-/* Inline functions - might actually be better off with these
- * non-inlined.  Certainly better off switching all command packets to
- * be passed as structs rather than dwords, but that's a little bit of
- * work...
- */
-static inline GLuint 
-intel_batchbuffer_space( struct intel_batchbuffer *batch )
-{
-   return (BATCH_SZ - BATCH_RESERVED) - (batch->ptr - (batch->map + batch->offset));
-}
-
-
-static inline void 
-intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch,
-			     GLuint dword)
-{
-   assert(batch->map);
-   assert(intel_batchbuffer_space(batch) >= 4);
-   *(GLuint *)(batch->ptr) = dword;
-   batch->ptr += 4;
-}
-
-static inline void 
-intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
-				GLuint sz,
-				GLuint flags)
-{
-   assert(sz < BATCH_SZ - 8);
-   if (intel_batchbuffer_space(batch) < sz ||
-       (batch->flags != 0 && flags != 0 && batch->flags != flags))
-      intel_batchbuffer_flush(batch);
-   
-   batch->flags |= flags;
-}
-
-void intel_batchbuffer_align( struct intel_batchbuffer *batch,
-			      GLuint align,
-			      GLuint sz );
-
-
-/* Here are the crusty old macros, to be removed:
- */
-#define BATCH_LOCALS 
-#define BEGIN_BATCH(n, flags) intel_batchbuffer_require_space(intel->batch, n*4, flags)
-#define OUT_BATCH(d)  intel_batchbuffer_emit_dword(intel->batch, d)
-
-#define OUT_RELOC(buf, flags, delta) do { 				\
-   assert((delta) >= 0);						\
-   OUT_BATCH(bmBufferOffset(intel, buf) + delta);				\
-} while (0)
-
-#define ADVANCE_BATCH() do { } while(0)
-
-
-#endif
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@ -41,7 +41,7 @@
 #include "intel_regions.h"
 #include "intel_structs.h"

-#include "bufmgr.h"
+#include "dri_bufmgr.h"

 #define FILE_DEBUG_FLAG DEBUG_BLIT

@ -64,8 +64,13 @@ void intelCopyBuffer( __DRIdrawablePrivate *dPriv,
   intel = (struct intel_context *) dPriv->driContextPriv->driverPrivate;
   intelFlush( &intel->ctx );

-
-   bmFinishFenceLock(intel, intel->last_swap_fence);
+   if (intel->last_swap_fence) {
+      dri_fence_wait(intel->last_swap_fence);
+      dri_fence_unreference(intel->last_swap_fence);
+      intel->last_swap_fence = NULL;
+   }
+   intel->last_swap_fence = intel->first_swap_fence;
+   intel->first_swap_fence = NULL;

   /* The LOCK_HARDWARE is required for the cliprects.  Buffer offsets
    * should work regardless.
@ -151,9 +156,12 @@ void intelCopyBuffer( __DRIdrawablePrivate *dPriv,
      }
   }

-   intel_batchbuffer_flush( intel->batch );
-   intel->second_last_swap_fence = intel->last_swap_fence;
-   intel->last_swap_fence = bmSetFenceLock( intel );
+   if (intel->first_swap_fence)
+      dri_fence_unreference(intel->first_swap_fence);
+   intel_batchbuffer_flush(intel->batch);
+   intel->first_swap_fence = intel->batch->last_fence;
+   if (intel->first_swap_fence != NULL)
+      dri_fence_reference(intel->first_swap_fence);
   UNLOCK_HARDWARE( intel );

   if (!rect)
@ -176,7 +184,7 @@ void intelCopyBuffer( __DRIdrawablePrivate *dPriv,
 void intelEmitFillBlit( struct intel_context *intel,
 			GLuint cpp,
 			GLshort dst_pitch,
-			struct buffer *dst_buffer,
+			dri_bo *dst_buffer,
 			GLuint dst_offset,
 			GLboolean dst_tiled,
 			GLshort x, GLshort y, 
@ -247,11 +255,11 @@ static GLuint translate_raster_op(GLenum logicop)
 void intelEmitCopyBlit( struct intel_context *intel,
 			GLuint cpp,
 			GLshort src_pitch,
-			struct buffer *src_buffer,
+			dri_bo *src_buffer,
 			GLuint  src_offset,
 			GLboolean src_tiled,
 			GLshort dst_pitch,
-			struct buffer *dst_buffer,
+			dri_bo *dst_buffer,
 			GLuint  dst_offset,
 			GLboolean dst_tiled,
 			GLshort src_x, GLshort src_y,
@ -524,7 +532,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
 				  GLubyte *src_bits, GLuint src_size,
 				  GLuint fg_color,
 				  GLshort dst_pitch,
-				  struct buffer *dst_buffer,
+				  dri_bo *dst_buffer,
 				  GLuint dst_offset,
 				  GLboolean dst_tiled,
 				  GLshort x, GLshort y, 
--- a/src/mesa/drivers/dri/i965/intel_blit.h
+++ b/src/mesa/drivers/dri/i965/intel_blit.h
@ -31,8 +31,6 @@
 #include "intel_context.h"
 #include "intel_ioctl.h"

-struct buffer;
-
 extern void intelCopyBuffer( __DRIdrawablePrivate *dpriv,
 			     const drm_clip_rect_t *rect );
 extern void intelClearWithBlit(GLcontext *ctx, GLbitfield mask);
@ -40,11 +38,11 @@ extern void intelClearWithBlit(GLcontext *ctx, GLbitfield mask);
 extern void intelEmitCopyBlit( struct intel_context *intel,
 			       GLuint cpp,
 			       GLshort src_pitch,
-			       struct buffer *src_buffer,
+			       dri_bo *src_buffer,
 			       GLuint  src_offset,
 			       GLboolean src_tiled,
 			       GLshort dst_pitch,
-			       struct buffer *dst_buffer,
+			       dri_bo *dst_buffer,
 			       GLuint  dst_offset,
 			       GLboolean dst_tiled,
 			       GLshort srcx, GLshort srcy,
@ -55,7 +53,7 @@ extern void intelEmitCopyBlit( struct intel_context *intel,
 extern void intelEmitFillBlit( struct intel_context *intel,
 			       GLuint cpp,
 			       GLshort dst_pitch,
-			       struct buffer *dst_buffer,
+			       dri_bo *dst_buffer,
 			       GLuint dst_offset,
 			       GLboolean dst_tiled,
 			       GLshort x, GLshort y, 
@ -68,7 +66,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
 				  GLubyte *src_bits, GLuint src_size,
 				  GLuint fg_color,
 				  GLshort dst_pitch,
-				  struct buffer *dst_buffer,
+				  dri_bo *dst_buffer,
 				  GLuint dst_offset,
 				  GLboolean dst_tiled,
 				  GLshort dst_x, GLshort dst_y, 
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
@ -32,8 +32,17 @@

 #include "intel_context.h"
 #include "intel_buffer_objects.h"
-#include "bufmgr.h"
+#include "dri_bufmgr.h"

+/** Allocates a new dri_bo to store the data for the buffer object. */
+static void
+intel_bufferobj_alloc_buffer(struct intel_context *intel,
+			     struct intel_buffer_object *intel_obj)
+{
+   intel_obj->buffer = dri_bo_alloc(intel->bufmgr, "bufferobj",
+				    intel_obj->Base.Size, 64,
+				    DRM_BO_FLAG_MEM_TT);
+}

 /**
 * There is some duplication between mesa's bufferobjects and our
@ -45,16 +54,10 @@ static struct gl_buffer_object *intel_bufferobj_alloc( GLcontext *ctx,
 						       GLuint name, 
 						       GLenum target )
 {
-   struct intel_context *intel = intel_context(ctx);
-   struct intel_buffer_object *obj = MALLOC_STRUCT(intel_buffer_object);
+   struct intel_buffer_object *obj = CALLOC_STRUCT(intel_buffer_object);

   _mesa_initialize_buffer_object(&obj->Base, name, target);

-   /* XXX:  We generate our own handle, which is different to 'name' above.
-    */
-   bmGenBuffers(intel, "bufferobj", 1, &obj->buffer, 6);
-   assert(obj->buffer);
-
   return &obj->Base;
 }

@ -66,14 +69,13 @@ static struct gl_buffer_object *intel_bufferobj_alloc( GLcontext *ctx,
 static void intel_bufferobj_free( GLcontext *ctx, 
 				  struct gl_buffer_object *obj )
 { 
-   struct intel_context *intel = intel_context(ctx);
   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);

   assert(intel_obj);

-   if (intel_obj->buffer) 
-      bmDeleteBuffers( intel, 1, &intel_obj->buffer );
-  
+   if (intel_obj->buffer)
+      dri_bo_unreference(intel_obj->buffer);
+
   _mesa_free(intel_obj);
 }

@ -103,7 +105,23 @@ static void intel_bufferobj_data( GLcontext *ctx,
   obj->Size = size;
   obj->Usage = usage;

-   bmBufferData(intel, intel_obj->buffer, size, data, 0);
+   /* While it would seem to make sense to always reallocate the buffer here,
+    * since it should allow us better concurrency between rendering and
+    * map-cpu write-unmap, doing so was a minor (~10%) performance loss
+    * for both classic and TTM mode with openarena.  That may change with
+    * improved buffer manager algorithms.
+    */
+   if (intel_obj->buffer != NULL && intel_obj->buffer->size != size) {
+      dri_bo_unreference(intel_obj->buffer);
+      intel_obj->buffer = NULL;
+   }
+   if (size != 0) {
+      if (intel_obj->buffer == NULL)
+	 intel_bufferobj_alloc_buffer(intel, intel_obj);
+
+      if (data != NULL)
+	 dri_bo_subdata(intel_obj->buffer, 0, size, data);
+   }
 }


@ -120,11 +138,10 @@ static void intel_bufferobj_subdata( GLcontext *ctx,
 				     const GLvoid * data,
 				     struct gl_buffer_object * obj )
 {
-   struct intel_context *intel = intel_context(ctx);
   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);

   assert(intel_obj);
-   bmBufferSubData(intel, intel_obj->buffer, offset, size, data);
+   dri_bo_subdata(intel_obj->buffer, offset, size, data);
 }


@ -138,11 +155,10 @@ static void intel_bufferobj_get_subdata( GLcontext *ctx,
 					 GLvoid * data,
 					 struct gl_buffer_object * obj )
 {
-   struct intel_context *intel = intel_context(ctx);
   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);

   assert(intel_obj);
-   bmBufferGetSubData(intel, intel_obj->buffer, offset, size, data);
+   dri_bo_get_subdata(intel_obj->buffer, offset, size, data);
 }


@ -155,14 +171,15 @@ static void *intel_bufferobj_map( GLcontext *ctx,
 				  GLenum access,
 				  struct gl_buffer_object *obj )
 {
-   struct intel_context *intel = intel_context(ctx);
   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);

   /* XXX: Translate access to flags arg below:
    */
   assert(intel_obj);
   assert(intel_obj->buffer);
-   obj->Pointer = bmMapBuffer(intel, intel_obj->buffer, 0);
+
+   dri_bo_map(intel_obj->buffer, GL_TRUE);
+   obj->Pointer = intel_obj->buffer->virtual;
   return obj->Pointer;
 }

@ -174,18 +191,17 @@ static GLboolean intel_bufferobj_unmap( GLcontext *ctx,
 					GLenum target,
 					struct gl_buffer_object *obj )
 {
-   struct intel_context *intel = intel_context(ctx);
   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);

   assert(intel_obj);
   assert(intel_obj->buffer);
   assert(obj->Pointer);
-   bmUnmapBuffer(intel, intel_obj->buffer);
+   dri_bo_unmap(intel_obj->buffer);
   obj->Pointer = NULL;
   return GL_TRUE;
 }

-struct buffer *intel_bufferobj_buffer( const struct intel_buffer_object *intel_obj )
+dri_bo *intel_bufferobj_buffer( const struct intel_buffer_object *intel_obj )
 {
   assert(intel_obj->Base.Name);
   assert(intel_obj->buffer);
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.h
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.h
@ -39,13 +39,13 @@ struct gl_buffer_object;
 */
 struct intel_buffer_object {
   struct gl_buffer_object Base;
-   struct buffer *buffer;   /* the low-level buffer manager's buffer handle */
+   dri_bo *buffer;   /* the low-level buffer manager's buffer handle */
 };


 /* Get the bm buffer associated with a GL bufferobject:
 */
-struct buffer *intel_bufferobj_buffer( const struct intel_buffer_object *obj );
+dri_bo *intel_bufferobj_buffer( const struct intel_buffer_object *obj );

 /* Hook the bufferobject implementation into mesa: 
 */
--- a/src/mesa/drivers/dri/i965/intel_bufmgr_ttm.c
+++ b/src/mesa/drivers/dri/i965/intel_bufmgr_ttm.c
@ -0,0 +1 @@
+../intel/intel_bufmgr_ttm.c
--- a/src/mesa/drivers/dri/i965/intel_context.c
+++ b/src/mesa/drivers/dri/i965/intel_context.c
@ -58,8 +58,10 @@
 #include "intel_blit.h"
 #include "intel_regions.h"
 #include "intel_buffer_objects.h"
+#include "intel_decode.h"
+#include "intel_bufmgr_ttm.h"

-#include "bufmgr.h"
+#include "i915_drm.h"

 #include "utils.h"
 #include "vblank.h"
@ -267,14 +269,20 @@ void intelFlush( GLcontext *ctx )
 {
   struct intel_context *intel = intel_context( ctx );

-   bmLockAndFence(intel);
+   if (intel->batch->map != intel->batch->ptr)
+      intel_batchbuffer_flush(intel->batch);
 }

 void intelFinish( GLcontext *ctx ) 
 {
   struct intel_context *intel = intel_context( ctx );

-   bmFinishFence(intel, bmLockAndFence(intel));
+   intelFlush(ctx);
+   if (intel->batch->last_fence) {
+      dri_fence_wait(intel->batch->last_fence);
+      dri_fence_unreference(intel->batch->last_fence);
+      intel->batch->last_fence = NULL;
+   }
 }

 static void
@ -308,6 +316,82 @@ intelEndQuery(GLcontext *ctx, GLenum target, struct gl_query_object *q)
 	intel->stats_wm--;
 }

+/** Driver-specific fence emit implementation for the fake memory manager. */
+static unsigned int
+intel_fence_emit(void *private)
+{
+   struct intel_context *intel = (struct intel_context *)private;
+   unsigned int fence;
+
+   /* XXX: Need to emit a flush, if we haven't already (at least with the
+    * current batchbuffer implementation, we have).
+    */
+
+   fence = intelEmitIrqLocked(intel);
+
+   return fence;
+}
+
+/** Driver-specific fence wait implementation for the fake memory manager. */
+static int
+intel_fence_wait(void *private, unsigned int cookie)
+{
+   struct intel_context *intel = (struct intel_context *)private;
+
+   intelWaitIrq(intel, cookie);
+
+   return 0;
+}
+
+static GLboolean
+intel_init_bufmgr(struct intel_context *intel)
+{
+   intelScreenPrivate *intelScreen = intel->intelScreen;
+   GLboolean ttm_disable = getenv("INTEL_NO_TTM") != NULL;
+
+   /* If we've got a new enough DDX that's initializing TTM and giving us
+    * object handles for the shared buffers, use that.
+    */
+   intel->ttm = GL_FALSE;
+   if (!ttm_disable &&
+       intel->intelScreen->driScrnPriv->ddx_version.minor >= 9 &&
+       intel->intelScreen->drmMinor >= 11 &&
+       intel->intelScreen->front.bo_handle != -1)
+   {
+      intel->bufmgr = intel_bufmgr_ttm_init(intel->driFd,
+					    DRM_FENCE_TYPE_EXE,
+					    DRM_FENCE_TYPE_EXE |
+					    DRM_I915_FENCE_TYPE_RW,
+					    BATCH_SZ);
+      if (intel->bufmgr != NULL)
+	 intel->ttm = GL_TRUE;
+   }
+   /* Otherwise, use the classic buffer manager. */
+   if (intel->bufmgr == NULL) {
+      if (ttm_disable) {
+	 fprintf(stderr, "TTM buffer manager disabled.  Using classic.\n");
+      } else {
+	 fprintf(stderr, "Failed to initialize TTM buffer manager.  "
+		 "Falling back to classic.\n");
+      }
+
+      if (intelScreen->tex.size == 0) {
+	 fprintf(stderr, "[%s:%u] Error initializing buffer manager.\n",
+		 __func__, __LINE__);
+	 return GL_FALSE;
+      }
+
+      intel->bufmgr = dri_bufmgr_fake_init(intelScreen->tex.offset,
+					   intelScreen->tex.map,
+					   intelScreen->tex.size,
+					   intel_fence_emit,
+					   intel_fence_wait,
+					   intel);
+   }
+
+   return GL_TRUE;
+}
+

 void intelInitDriverFunctions( struct dd_function_table *functions )
 {
@ -333,8 +417,6 @@ void intelInitDriverFunctions( struct dd_function_table *functions )
   intelInitBufferFuncs( functions );
 }

-
-
 GLboolean intelInitContext( struct intel_context *intel,
 			    const __GLcontextModes *mesaVis,
 			    __DRIcontextPrivate *driContextPriv,
@ -361,6 +443,16 @@ GLboolean intelInitContext( struct intel_context *intel,
   intel->driScreen = sPriv;
   intel->sarea = saPriv;

+   /* Dri stuff */
+   intel->hHWContext = driContextPriv->hHWContext;
+   intel->driFd = sPriv->fd;
+   intel->driHwLock = (drmLock *) &sPriv->pSAREA->lock;
+
+   intel->maxBatchSize = BATCH_SZ;
+
+   if (!intel_init_bufmgr(intel))
+      return GL_FALSE;
+
   driParseConfigFiles (&intel->optionCache, &intelScreen->optionCache,
 		   intel->driScreen->myNum, "i965");

@ -408,11 +500,6 @@ GLboolean intelInitContext( struct intel_context *intel,
   _swrast_allow_pixel_fog( ctx, GL_FALSE );
   _swrast_allow_vertex_fog( ctx, GL_TRUE );

-   /* Dri stuff */
-   intel->hHWContext = driContextPriv->hHWContext;
-   intel->driFd = sPriv->fd;
-   intel->driHwLock = (drmLock *) &sPriv->pSAREA->lock;
-
   intel->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24;
   intel->hw_stipple = 1;

@ -439,8 +526,6 @@ GLboolean intelInitContext( struct intel_context *intel,
   /* Initialize swrast, tnl driver tables: */
   intelInitSpanFuncs( ctx );

-   intel->no_hw = getenv("INTEL_NO_HW") != NULL;
-
   if (!intel->intelScreen->irq_active) {
      _mesa_printf("IRQs not active.  Exiting\n");
      exit(1);
@ -449,62 +534,15 @@ GLboolean intelInitContext( struct intel_context *intel,

   INTEL_DEBUG  = driParseDebugString( getenv( "INTEL_DEBUG" ),
 				       debug_control );
+   if (!intel->ttm && (INTEL_DEBUG & DEBUG_BUFMGR))
+      dri_bufmgr_fake_set_debug(intel->bufmgr, GL_TRUE);

+   intel_recreate_static_regions(intel);

-   /* Buffer manager: 
-    */
-   intel->bm = bm_fake_intel_Attach( intel );
-
-
-   bmInitPool(intel,
-	      intel->intelScreen->tex.offset, /* low offset */
-	      intel->intelScreen->tex.map, /* low virtual */
-	      intel->intelScreen->tex.size,
-	      BM_MEM_AGP);
-
-   /* These are still static, but create regions for them.  
-    */
-   intel->front_region = 
-      intel_region_create_static(intel,
-				 BM_MEM_AGP,
-				 intelScreen->front.offset,
-				 intelScreen->front.map,
-				 intelScreen->cpp,
-				 intelScreen->front.pitch / intelScreen->cpp,
-				 intelScreen->height,
-				 intelScreen->front.size,
-				 intelScreen->front.tiled != 0);
-
-   intel->back_region = 
-      intel_region_create_static(intel,
-				 BM_MEM_AGP,
-				 intelScreen->back.offset,
-				 intelScreen->back.map,
-				 intelScreen->cpp,
-				 intelScreen->back.pitch / intelScreen->cpp,
-				 intelScreen->height,
-				 intelScreen->back.size,
-                                 intelScreen->back.tiled != 0);
-
-   /* Still assuming front.cpp == depth.cpp
-    *
-    * XXX: Setting tiling to false because Depth tiling only supports
-    * YMAJOR but the blitter only supports XMAJOR tiling.  Have to
-    * resolve later.
-    */
-   intel->depth_region = 
-      intel_region_create_static(intel,
-				 BM_MEM_AGP,
-				 intelScreen->depth.offset,
-				 intelScreen->depth.map,
-				 intelScreen->cpp,
-				 intelScreen->depth.pitch / intelScreen->cpp,
-				 intelScreen->height,
-				 intelScreen->depth.size,
-                                 intelScreen->depth.tiled != 0);
-   
   intel_bufferobj_init( intel );
   intel->batch = intel_batchbuffer_alloc( intel );
+   intel->last_swap_fence = NULL;
+   intel->first_swap_fence = NULL;

   if (intel->ctx.Mesa_DXTn) {
      _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
@ -519,12 +557,16 @@ GLboolean intelInitContext( struct intel_context *intel,
 /* 			  DRI_TEXMGR_DO_TEXTURE_2D |  */
 /* 			  DRI_TEXMGR_DO_TEXTURE_RECT ); */

-
+   /* Force all software fallbacks */
   if (getenv("INTEL_NO_RAST")) {
      fprintf(stderr, "disabling 3D rasterization\n");
      intel->no_rast = 1;
   }

+   /* Disable all hardware rendering (skip emitting batches and fences/waits
+    * to the kernel)
+    */
+   intel->no_hw = getenv("INTEL_NO_HW") != NULL;

   return GL_TRUE;
 }
@ -549,7 +591,17 @@ void intelDestroyContext(__DRIcontextPrivate *driContextPriv)
      intel->Fallback = 0;	/* don't call _swrast_Flush later */
      intel_batchbuffer_free(intel->batch);
      intel->batch = NULL;
-      
+
+      if (intel->last_swap_fence) {
+	 dri_fence_wait(intel->last_swap_fence);
+	 dri_fence_unreference(intel->last_swap_fence);
+	 intel->last_swap_fence = NULL;
+      }
+      if (intel->first_swap_fence) {
+	 dri_fence_wait(intel->first_swap_fence);
+	 dri_fence_unreference(intel->first_swap_fence);
+	 intel->first_swap_fence = NULL;
+      }

      if ( release_texture_heaps ) {
         /* This share group is about to go away, free our private
@ -628,7 +680,6 @@ static void intelContendedLock( struct intel_context *intel, GLuint flags )
   __DRIscreenPrivate *sPriv = intel->driScreen;
   volatile drmI830Sarea * sarea = intel->sarea;
   int me = intel->hHWContext;
-   int my_bufmgr = bmCtxId(intel);

   drmGetLock(intel->driFd, intel->hHWContext, flags);

@ -655,16 +706,20 @@ static void intelContendedLock( struct intel_context *intel, GLuint flags )
      intel->vtbl.lost_hardware( intel );
   }

-   /* As above, but don't evict the texture data on transitions
-    * between contexts which all share a local buffer manager.
+   /* If the last consumer of the texture memory wasn't us, notify the fake
+    * bufmgr and record the new owner.  We should have the memory shared
+    * between contexts of a single fake bufmgr, but this will at least make
+    * things correct for now.
    */
-   if (sarea->texAge != my_bufmgr) {
+   if (!intel->ttm && sarea->texAge != intel->hHWContext) {
+      sarea->texAge = intel->hHWContext;
+      dri_bufmgr_fake_contended_lock_take(intel->bufmgr);
+      if (INTEL_DEBUG & DEBUG_BATCH)
+	 intel_decode_context_reset();
      if (INTEL_DEBUG & DEBUG_BUFMGR) {
-	 fprintf(stderr, "Lost Textures: sarea->texAge %x my_bufmgr %x\n",
-		 sarea->ctxOwner, my_bufmgr);
+	 fprintf(stderr, "Lost Textures: sarea->texAge %x hw context %x\n",
+		 sarea->ctxOwner, intel->hHWContext);
      }
-      sarea->texAge = my_bufmgr;
-      bm_fake_NotifyContendedLockTake( intel ); 
   }

   /* Drawable changed?
@ -694,29 +749,6 @@ void LOCK_HARDWARE( struct intel_context *intel )

   intel->locked = 1;

-   if (bmError(intel)) {
-      bmEvictAll(intel);
-      intel->vtbl.lost_hardware( intel );
-   }
-
-   /* Make sure nothing has been emitted prior to getting the lock:
-    */
-   assert(intel->batch->map == 0);
-
-   /* XXX: postpone, may not be needed:
-    */
-   if (!intel_batchbuffer_map(intel->batch)) {
-      bmEvictAll(intel);
-      intel->vtbl.lost_hardware( intel );
-
-      /* This could only fail if the batchbuffer was greater in size
-       * than the available texture memory:
-       */
-      if (!intel_batchbuffer_map(intel->batch)) {
-	 _mesa_printf("double failure to map batchbuffer\n");
-	 assert(0);
-      }
-   }
 }
 
  
@ -724,11 +756,6 @@ void LOCK_HARDWARE( struct intel_context *intel )
 */
 void UNLOCK_HARDWARE( struct intel_context *intel )
 {
-   /* Make sure everything has been released: 
-    */
-   assert(intel->batch->ptr == intel->batch->map + intel->batch->offset);
-
-   intel_batchbuffer_unmap(intel->batch);
   intel->vtbl.note_unlock( intel );
   intel->locked = 0;

--- a/src/mesa/drivers/dri/i965/intel_context.h
+++ b/src/mesa/drivers/dri/i965/intel_context.h
@ -151,20 +151,34 @@ struct intel_context
 			     GLfloat s0, GLfloat s1,
 			     GLfloat t0, GLfloat t1);

-
-
+      void (*debug_batch)(struct intel_context *intel);
   } vtbl;

   GLint refcount;   
   GLuint Fallback;
   GLuint NewGLState;
-   
-   GLuint last_swap_fence;
-   GLuint second_last_swap_fence;
-   
+
+   dri_bufmgr *bufmgr;
+   unsigned int maxBatchSize;
+
+   struct intel_region *front_region;
+   struct intel_region *back_region;
+   struct intel_region *third_region;
+   struct intel_region *depth_region;
+
+   /**
+    * This value indicates that the kernel memory manager is being used
+    * instead of the fake client-side memory manager.
+    */
+   GLboolean ttm;
+
+   dri_fence *first_swap_fence;
+   dri_fence *last_swap_fence;
+
   GLuint stats_wm;

   struct intel_batchbuffer *batch;
+   unsigned batch_id;

   GLubyte clear_chan[4];
   GLuint ClearColor;
@ -178,29 +192,17 @@ struct intel_context
   GLboolean hw_stencil;
   GLboolean hw_stipple;
   GLboolean depth_buffer_is_float;
-   GLboolean no_hw;
   GLboolean no_rast;
-   GLboolean thrashing;
   GLboolean locked;
   GLboolean strict_conformance;
   GLboolean need_flush;

-
-   
-   /* AGP memory buffer manager:
-    */
-   struct bufmgr *bm;
-
-
   /* State for intelvb.c and inteltris.c.
    */
   GLenum render_primitive;
   GLenum reduced_primitive;

-   struct intel_region *front_region;
-   struct intel_region *back_region;
   struct intel_region *draw_region;
-   struct intel_region *depth_region;

   /* These refer to the current draw (front vs. back) buffer:
    */
@ -226,6 +228,8 @@ struct intel_context

   GLuint lastStamp;

+   GLboolean no_hw;
+
   /**
    * Configuration cache
    */
--- a/src/mesa/drivers/dri/i965/intel_ioctl.c
+++ b/src/mesa/drivers/dri/i965/intel_ioctl.c
@ -41,27 +41,19 @@
 #include "intel_blit.h"
 #include "intel_regions.h"
 #include "drm.h"
-#include "bufmgr.h"
+#include "dri_bufmgr.h"
+#include "intel_bufmgr_ttm.h"
+#include "i915_drm.h"

-static int intelWaitIdleLocked( struct intel_context *intel )
+static void intelWaitIdleLocked( struct intel_context *intel )
 {
-   static int in_wait_idle = 0;
   unsigned int fence;

-   if (!in_wait_idle) {
-      if (INTEL_DEBUG & DEBUG_SYNC) {
-	 fprintf(stderr, "waiting for idle\n");
-      }
+   if (INTEL_DEBUG & DEBUG_SYNC)
+      fprintf(stderr, "waiting for idle\n");

-      in_wait_idle = 1;
-      fence = bmSetFence(intel);
-      intelWaitIrq(intel, fence);
-      in_wait_idle = 0;
-
-      return bmTestFence(intel, fence);
-   } else {
-      return 1;
-   }
+   fence = intelEmitIrqLocked(intel);
+   intelWaitIrq(intel, fence);
 }

 int intelEmitIrqLocked( struct intel_context *intel )
@ -71,13 +63,14 @@ int intelEmitIrqLocked( struct intel_context *intel )
   if (!intel->no_hw) {
      drmI830IrqEmit ie;
      int ret;
-      
+      /*
      assert(((*(int *)intel->driHwLock) & ~DRM_LOCK_CONT) == 
 	     (DRM_LOCK_HELD|intel->hHWContext));
-
+      */
      ie.irq_seq = &seq;

-      ret = drmCommandWriteRead( intel->driFd, DRM_I830_IRQ_EMIT, 
+      ret = drmCommandWriteRead( intel->driFd,
+				 DRM_I830_IRQ_EMIT, 
 				 &ie, sizeof(ie) );
      if ( ret ) {
 	 fprintf( stderr, "%s: drmI830IrqEmit: %d\n", __FUNCTION__, ret );
@ -96,21 +89,25 @@ void intelWaitIrq( struct intel_context *intel, int seq )
   if (!intel->no_hw) {
      drmI830IrqWait iw;
      int ret, lastdispatch;
-      
+      volatile drmI830Sarea *sarea = intel->sarea;
+
      if (0)
 	 fprintf(stderr, "%s %d\n", __FUNCTION__, seq );

      iw.irq_seq = seq;
 	
      do {
-	 lastdispatch = intel->sarea->last_dispatch;
-	 ret = drmCommandWrite( intel->driFd, DRM_I830_IRQ_WAIT, &iw, sizeof(iw) );
+	 lastdispatch = sarea->last_dispatch;
+	 ret = drmCommandWrite( intel->driFd,
+				DRM_I830_IRQ_WAIT, &iw, sizeof(iw) );

 	 /* This seems quite often to return before it should!?! 
 	  */
-      } while (ret == -EAGAIN || ret == -EINTR || (ret == -EBUSY && lastdispatch != intel->sarea->last_dispatch) || (ret == 0 && seq > intel->sarea->last_dispatch)
-	       || (ret == 0 && intel->sarea->last_dispatch - seq >= (1 << 24)));
-      
+      } while (ret == -EAGAIN ||
+	       ret == -EINTR ||
+	       (ret == -EBUSY && lastdispatch != sarea->last_dispatch) ||
+	       (ret == 0 && seq > sarea->last_dispatch) ||
+	       (ret == 0 && sarea->last_dispatch - seq >= (1 << 24)));

      if ( ret ) {
 	 fprintf( stderr, "%s: drmI830IrqWait: %d\n", __FUNCTION__, ret );
@ -123,7 +120,9 @@ void intelWaitIrq( struct intel_context *intel, int seq )

 void intel_batch_ioctl( struct intel_context *intel, 
 			GLuint start_offset,
-			GLuint used)
+			GLuint used,
+			GLboolean ignore_cliprects,
+			GLboolean allow_unlock )
 {
   drmI830BatchBuffer batch;

@ -157,45 +156,61 @@ void intel_batch_ioctl( struct intel_context *intel,
 	 UNLOCK_HARDWARE(intel);
 	 exit(1);
      }
-
-      if (INTEL_DEBUG & DEBUG_SYNC) {
-	intelWaitIdleLocked(intel);
-      }
   }
 }

-void intel_cmd_ioctl( struct intel_context *intel, 
-		      char *buf,
-		      GLuint used)
+void
+intel_exec_ioctl(struct intel_context *intel,
+		 GLuint used,
+		 GLboolean ignore_cliprects, GLboolean allow_unlock,
+		 void *start, GLuint count, dri_fence **fence)
 {
-   drmI830CmdBuffer cmd;
+   struct drm_i915_execbuffer execbuf;
+   dri_fence *fo;

   assert(intel->locked);
   assert(used);

-   cmd.buf = buf;
-   cmd.sz = used;
-   cmd.cliprects = intel->pClipRects;
-   cmd.num_cliprects = 0;
-   cmd.DR1 = 0;
-   cmd.DR4 = 0;
-      
-   if (INTEL_DEBUG & DEBUG_DMA)
-      fprintf(stderr, "%s: 0x%x..0x%x\n",
-	      __FUNCTION__, 
-	      0, 
-	      0 + cmd.sz);
-
-   if (!intel->no_hw) {
-      if (drmCommandWrite (intel->driFd, DRM_I830_CMDBUFFER, &cmd, 
-			   sizeof(cmd))) {
-	 fprintf(stderr, "DRM_I830_CMDBUFFER: %d\n",  -errno);
-	 UNLOCK_HARDWARE(intel);
-	 exit(1);
-      }
-
-      if (INTEL_DEBUG & DEBUG_SYNC) {
-	intelWaitIdleLocked(intel);
-      }
+   if (*fence) {
+     dri_fence_unreference(*fence);
   }
+
+   memset(&execbuf, 0, sizeof(execbuf));
+
+   execbuf.num_buffers = count;
+   execbuf.batch.used = used;
+   execbuf.batch.cliprects = intel->pClipRects;
+   execbuf.batch.num_cliprects = ignore_cliprects ? 0 : intel->numClipRects;
+   execbuf.batch.DR1 = 0;
+   execbuf.batch.DR4 = ((((GLuint) intel->drawX) & 0xffff) |
+			(((GLuint) intel->drawY) << 16));
+
+   execbuf.ops_list = (unsigned)start; // TODO
+   execbuf.fence_arg.flags = DRM_FENCE_FLAG_SHAREABLE | DRM_I915_FENCE_FLAG_FLUSHED;
+
+   if (intel->no_hw)
+      return;
+
+   if (drmCommandWriteRead(intel->driFd, DRM_I915_EXECBUFFER, &execbuf,
+                       sizeof(execbuf))) {
+      fprintf(stderr, "DRM_I830_EXECBUFFER: %d\n", -errno);
+      UNLOCK_HARDWARE(intel);
+      exit(1);
+   }
+
+
+   fo = intel_ttm_fence_create_from_arg(intel->bufmgr, "fence buffers",
+					&execbuf.fence_arg);
+   if (!fo) {
+      fprintf(stderr, "failed to fence handle: %08x\n", execbuf.fence_arg.handle);
+      UNLOCK_HARDWARE(intel);
+      exit(1);
+   }
+   *fence = fo;
+
+   /* FIXME: use hardware contexts to avoid 'losing' hardware after
+    * each buffer flush.
+    */
+   intel->vtbl.lost_hardware(intel);
+
 }
--- a/src/mesa/drivers/dri/i965/intel_ioctl.h
+++ b/src/mesa/drivers/dri/i965/intel_ioctl.h
@ -35,10 +35,12 @@ int intelEmitIrqLocked( struct intel_context *intel );

 void intel_batch_ioctl( struct intel_context *intel, 
 			GLuint start_offset,
-			GLuint used);
-
-void intel_cmd_ioctl( struct intel_context *intel, 
-		      char *buf,
-		      GLuint used);
+			GLuint used,
+			GLboolean ignore_cliprects,
+			GLboolean allow_unlock );
+void intel_exec_ioctl(struct intel_context *intel,
+		      GLuint used,
+		      GLboolean ignore_cliprects, GLboolean allow_unlock,
+		      void *start, GLuint count, dri_fence **fence);

 #endif
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@ -28,7 +28,7 @@
 #include "intel_context.h"
 #include "intel_mipmap_tree.h"
 #include "intel_regions.h"
-#include "bufmgr.h"
+#include "dri_bufmgr.h"
 #include "enums.h"
 #include "imports.h"

--- a/src/mesa/drivers/dri/i965/intel_regions.c
+++ b/src/mesa/drivers/dri/i965/intel_regions.c
@ -42,7 +42,8 @@
 #include "intel_context.h"
 #include "intel_regions.h"
 #include "intel_blit.h"
-#include "bufmgr.h"
+#include "dri_bufmgr.h"
+#include "intel_bufmgr_ttm.h"
 #include "imports.h"

 #define FILE_DEBUG_FLAG DEBUG_REGION
@ -53,9 +54,8 @@ GLubyte *intel_region_map(struct intel_context *intel, struct intel_region *regi
 {
   DBG("%s\n", __FUNCTION__);
   if (!region->map_refcount++) {
-      region->map = bmMapBuffer(intel, region->buffer, 0);
-      if (!region->map)
-	 region->map_refcount--;
+      dri_bo_map(region->buffer, GL_TRUE);
+      region->map = region->buffer->virtual;
   }

   return region->map;
@ -66,7 +66,7 @@ void intel_region_unmap(struct intel_context *intel,
 {
   DBG("%s\n", __FUNCTION__);
   if (!--region->map_refcount) {
-      bmUnmapBuffer(intel, region->buffer);
+      dri_bo_unmap(region->buffer);
      region->map = NULL;
   }
 }
@ -86,8 +86,8 @@ struct intel_region *intel_region_alloc( struct intel_context *intel,
   region->height = height; 	/* needed? */
   region->refcount = 1;

-   bmGenBuffers(intel, "tex", 1, &region->buffer, 6);
-   bmBufferData(intel, region->buffer, pitch * cpp * height, NULL, 0);
+   region->buffer = dri_bo_alloc(intel->bufmgr, "region",
+				 pitch * cpp * height, 64, DRM_BO_FLAG_MEM_TT);

   return region;
 }
@ -110,56 +110,12 @@ void intel_region_release( struct intel_context *intel,
   
   if (--(*region)->refcount == 0) {
      assert((*region)->map_refcount == 0);
-      bmDeleteBuffers(intel, 1, &(*region)->buffer);
+      dri_bo_unreference((*region)->buffer);
      free(*region);
   }
   *region = NULL;
 }

-
-struct intel_region *intel_region_create_static( struct intel_context *intel, 
-						 GLuint mem_type,
-						 GLuint offset,
-						 void *virtual,
-						 GLuint cpp,
-						 GLuint pitch, 
-						 GLuint height,
-						 GLuint size,
-						 GLboolean tiled )
-{
-   struct intel_region *region = calloc(sizeof(*region), 1);
-   GLint pool;
-
-   DBG("%s\n", __FUNCTION__);
-
-   region->cpp = cpp;
-   region->pitch = pitch;
-   region->height = height; 	/* needed? */
-   region->refcount = 1;
-   region->tiled = tiled;
-
-   /* Recipe for creating a static buffer - create a static pool with
-    * the right offset and size, generate a buffer and use a special
-    * call to bind it to all of the memory in that pool.
-    */
-   pool = bmInitPool(intel, offset, virtual, size, 
-		     (BM_MEM_AGP |
-		      BM_NO_UPLOAD | 
-		      BM_NO_EVICT | 
-		      BM_NO_MOVE));
-   if (pool < 0) {
-      _mesa_printf("bmInitPool failed for static region\n");
-      exit(1);
-   }
-
-   region->buffer = bmGenBufferStatic(intel, pool);
-
-   return region;
-}
-
-
-
-
 void _mesa_copy_rect( GLubyte *dst,
 		      GLuint cpp,
 		      GLuint dst_pitch,
@ -212,41 +168,17 @@ GLboolean intel_region_data(struct intel_context *intel,
 {
   DBG("%s\n", __FUNCTION__);

-   if (width == dst->pitch && 
-       width == src_pitch &&
-       dst_offset == 0 &&
-       height == dst->height &&
-       srcx == 0 &&
-       srcy == 0) 
-   {
-      return (bmBufferData(intel,
-			   dst->buffer,
-			   dst->cpp * width * dst->height,
-			   src, 0) == 0);
-   }
-   else {
-      GLubyte *map = intel_region_map(intel, dst);
+   assert (dst_offset + dstx + width +
+	   (dsty + height - 1) * dst->pitch * dst->cpp <=
+	   dst->pitch * dst->cpp * dst->height);

-      if (map) {
-	 assert (dst_offset + dstx + width + 
-		 (dsty + height - 1) * dst->pitch * dst->cpp <= 
-		 dst->pitch * dst->cpp * dst->height);
-	 
-	 _mesa_copy_rect(map + dst_offset,
-			 dst->cpp,
-			 dst->pitch,
-			 dstx, dsty,
-			 width, height,
-			 src,
-			 src_pitch,
-			 srcx, srcy);      
-	 
-	 intel_region_unmap(intel, dst);
-	 return GL_TRUE;
-      }
-      else 
-	 return GL_FALSE;
-   }
+   _mesa_copy_rect(intel_region_map(intel, dst) + dst_offset,
+                   dst->cpp,
+                   dst->pitch,
+                   dstx, dsty, width, height, src, src_pitch, srcx, srcy);
+   intel_region_unmap(intel, dst);
+
+   return GL_TRUE;
 }
 			  
 /* Copy rectangular sub-regions. Need better logic about when to
@ -295,3 +227,79 @@ void intel_region_fill( struct intel_context *intel,
 		     color );
 }

+static struct intel_region *
+intel_recreate_static(struct intel_context *intel,
+		      const char *name,
+		      struct intel_region *region,
+		      intelRegion *region_desc,
+		      GLuint mem_type)
+{
+   intelScreenPrivate *intelScreen = intel->intelScreen;
+
+   if (region == NULL) {
+      region = calloc(sizeof(*region), 1);
+      region->refcount = 1;
+   }
+
+   region->cpp = intelScreen->cpp;
+   region->pitch = region_desc->pitch / intelScreen->cpp;
+   region->height = intelScreen->height;     /* needed? */
+   region->tiled = region_desc->tiled;
+
+   if (intel->ttm) {
+      assert(region_desc->bo_handle != -1);
+      region->buffer = intel_ttm_bo_create_from_handle(intel->bufmgr,
+						       name,
+						       region_desc->bo_handle);
+   } else {
+      region->buffer = dri_bo_alloc_static(intel->bufmgr,
+					   name,
+					   region_desc->offset,
+					   region_desc->pitch *
+					   intelScreen->height,
+					   region_desc->map,
+					   DRM_BO_FLAG_MEM_TT);
+   }
+
+   assert(region->buffer != NULL);
+
+   return region;
+}
+
+/**
+ * Create intel_region structs to describe the static front, back, and depth
+ * buffers created by the xserver.
+ *
+ * Although FBO's mean we now no longer use these as render targets in
+ * all circumstances, they won't go away until the back and depth
+ * buffers become private, and the front buffer will remain even then.
+ *
+ * Note that these don't allocate video memory, just describe
+ * allocations alread made by the X server.
+ */
+void
+intel_recreate_static_regions(struct intel_context *intel)
+{
+   intelScreenPrivate *intelScreen = intel->intelScreen;
+
+   intel->front_region =
+      intel_recreate_static(intel, "front",
+			    intel->front_region,
+			    &intelScreen->front,
+			    DRM_BO_FLAG_MEM_TT);
+
+   intel->back_region =
+      intel_recreate_static(intel, "back",
+			    intel->back_region,
+			    &intelScreen->back,
+			    DRM_BO_FLAG_MEM_TT);
+
+   /* Still assumes front.cpp == depth.cpp.  We can kill this when we move to
+    * private buffers.
+    */
+   intel->depth_region =
+      intel_recreate_static(intel, "depth",
+			    intel->depth_region,
+			    &intelScreen->depth,
+			    DRM_BO_FLAG_MEM_TT);
+}
--- a/src/mesa/drivers/dri/i965/intel_regions.h
+++ b/src/mesa/drivers/dri/i965/intel_regions.h
@ -29,7 +29,8 @@
 #define INTEL_REGIONS_H

 #include "mtypes.h"
-#include "bufmgr.h"		/* for DBG! */
+#include "dri_bufmgr.h"		/* for DBG! */
+#include "intel_screen.h"
 struct intel_context;

 /* A layer on top of the bufmgr buffers that adds a few useful things:
@ -40,7 +41,7 @@ struct intel_context;
 * - Blitter commands for copying 2D regions between buffers.
 */
 struct intel_region {
-   struct buffer *buffer;
+   dri_bo *buffer;
   GLuint refcount;
   GLuint cpp;
   GLuint pitch;
@ -66,20 +67,31 @@ void intel_region_reference( struct intel_region **dst,
 void intel_region_release(struct intel_context *intel,
 			  struct intel_region **ib );

+void intel_recreate_static_regions(struct intel_context *intel);
+
 /* Static regions may be tiled.  The assumption is that the X server
 * has set up fence registers to define tiled zones in agp and these
 * buffers are within those zones.  Tiling regions without fence
 * registers is more work.
 */
-struct intel_region *intel_region_create_static( struct intel_context *intel,
-						 GLuint mem_type,
-						 GLuint offset,
-						 void *virtual,
-						 GLuint cpp,
-						 GLuint pitch,
-						 GLuint height,
-						 GLuint size,
-						 GLboolean tiled );
+struct intel_region *
+intel_region_create_static(intelScreenPrivate *intelScreen,
+			   char *name,
+			   GLuint mem_type,
+			   unsigned int bo_handle,
+			   GLuint offset,
+			   void *virtual,
+			   GLuint cpp,
+			   GLuint pitch, GLuint height, GLboolean tiled);
+void
+intel_region_update_static(intelScreenPrivate *intelScreen,
+			   struct intel_region *region,
+			   GLuint mem_type,
+			   unsigned int bo_handle,
+			   GLuint offset,
+			   void *virtual,
+			   GLuint cpp, GLuint pitch, GLuint height,
+			   GLboolean tiled);

 /* Map/unmap regions.  This is refcounted also: 
 */
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@ -42,7 +42,10 @@
 #include "intel_tex.h"
 #include "intel_span.h"
 #include "intel_ioctl.h"
+#include "intel_regions.h"
+#include "intel_bufmgr_ttm.h"

+#include "i915_drm.h"
 #include "i830_dri.h"

 PUBLIC const char __driConfigOptions[] =
@ -126,7 +129,6 @@ intelMapScreenRegions(__DRIscreenPrivate *sPriv)
   return GL_TRUE;
 }

-
 void
 intelUnmapScreenRegions(intelScreenPrivate *intelScreen)
 {
@ -243,6 +245,16 @@ intelUpdateScreenFromSAREA(intelScreenPrivate *intelScreen,
   intelScreen->depth.size = sarea->depth_size;
   intelScreen->depth.tiled = sarea->depth_tiled;

+   if (intelScreen->driScrnPriv->ddx_version.minor >= 9) {
+      intelScreen->front.bo_handle = sarea->front_bo_handle;
+      intelScreen->back.bo_handle = sarea->back_bo_handle;
+      intelScreen->depth.bo_handle = sarea->depth_bo_handle;
+   } else {
+      intelScreen->front.bo_handle = -1;
+      intelScreen->back.bo_handle = -1;
+      intelScreen->depth.bo_handle = -1;
+   }
+
   intelScreen->tex.offset = sarea->tex_offset;
   intelScreen->logTextureGranularity = sarea->log_tex_granularity;
   intelScreen->tex.handle = sarea->tex_handle;
@ -357,7 +369,7 @@ static GLboolean intelInitDriver(__DRIscreenPrivate *sPriv)
   }

   sPriv->extensions = intelExtensions;
-   
+
   return GL_TRUE;
 }

--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ b/src/mesa/drivers/dri/i965/intel_screen.h
@ -30,6 +30,7 @@

 #include <sys/time.h>
 #include "dri_util.h"
+#include "dri_bufmgr.h"
 #include "xmlconfig.h"
 #include "i830_common.h"

@ -42,6 +43,7 @@ typedef struct {
   char *map;           /* memory map */
   int offset;          /* from start of video mem, in bytes */
   int pitch;           /* row stride, in pixels */
+   unsigned int bo_handle;
   unsigned int tiled; 
 } intelRegion;

@ -52,7 +54,7 @@ typedef struct
   intelRegion rotated;
   intelRegion depth;
   intelRegion tex;
-   
+
   int deviceID;
   int width;
   int height;
--- a/src/mesa/drivers/dri/i965/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c
@ -31,7 +31,7 @@
 #include "intel_context.h"
 #include "intel_mipmap_tree.h"
 #include "intel_tex.h"
-#include "bufmgr.h"
+#include "dri_bufmgr.h"

 /**
 * Compute which mipmap levels that really need to be sent to the hardware.
@ -116,11 +116,13 @@ static void intel_texture_invalidate( struct intel_texture_object *intelObj )
      intelObj->dirty_images[face] = ~0;
 }

+#if 0
 static void intel_texture_invalidate_cb( struct intel_context *intel,
 					 void *ptr )
 {
   intel_texture_invalidate( (struct intel_texture_object *) ptr );
 }
+#endif

 /*  
 */
@ -207,7 +209,7 @@ GLuint intel_finalize_mipmap_tree( struct intel_context *intel,
 					  firstImage->Depth,
 					  cpp,
 					  firstImage->IsCompressed);
-
+#if 0
      /* Tell the buffer manager that we will manage the backing
       * store, but we still want it to do fencing for us.
       */
@ -216,6 +218,7 @@ GLuint intel_finalize_mipmap_tree( struct intel_context *intel,
 			      intel_texture_invalidate_cb,
 			      intelObj,
 			      GL_FALSE);
+#endif
   }

   /* Pull in any images not in the object's tree:
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@ -78,15 +78,15 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch)
      batch->buf = NULL;
   }

-   batch->buf = dri_bo_alloc(intel->intelScreen->bufmgr, "batchbuffer",
-			     intel->intelScreen->maxBatchSize, 4096,
+   batch->buf = dri_bo_alloc(intel->bufmgr, "batchbuffer",
+			     intel->maxBatchSize, 4096,
 			     DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED);
   dri_bo_map(batch->buf, GL_TRUE);
   batch->map = batch->buf->virtual;
-   batch->size = intel->intelScreen->maxBatchSize;
+   batch->size = intel->maxBatchSize;
   batch->ptr = batch->map;
   batch->dirty_state = ~0;
-   batch->id = batch->intel->intelScreen->batch_id++;
+   batch->id = batch->intel->batch_id++;
 }

 struct intel_batchbuffer *
@ -144,7 +144,7 @@ do_flush_locked(struct intel_batchbuffer *batch,
    */

   if (!(intel->numClipRects == 0 && !ignore_cliprects)) {
-      if (intel->intelScreen->ttm == GL_TRUE) {
+      if (intel->ttm == GL_TRUE) {
 	 intel_exec_ioctl(batch->intel,
 			  used, ignore_cliprects, allow_unlock,
 			  start, count, &batch->last_fence);
@ -175,6 +175,9 @@ do_flush_locked(struct intel_batchbuffer *batch,
      intel_decode(batch->buf->virtual, used / 4, batch->buf->offset,
 		   intel->intelScreen->deviceID);
      dri_bo_unmap(batch->buf);
+
+      if (intel->vtbl.debug_batch != NULL)
+	 intel->vtbl.debug_batch(intel);
   }
 }

@ -243,7 +246,12 @@ intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
                             GLuint flags, GLuint delta)
 {
   dri_emit_reloc(batch->buf, flags, delta, batch->ptr - batch->map, buffer);
-   batch->ptr += 4;
+   /*
+    * Using the old buffer offset, write in what the right data would be, in case
+    * the buffer doesn't move and we can short-circuit the relocation processing
+    * in the kernel
+    */
+   intel_batchbuffer_emit_dword (batch, buffer->offset + delta);

   return GL_TRUE;
 }
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
@ -95,7 +95,6 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
 #define BATCH_LOCALS

 #define BEGIN_BATCH(n, flags) do {				\
-   assert(!intel->prim.flush);					\
   intel_batchbuffer_require_space(intel->batch, (n)*4, flags);	\
 } while (0)

--- a/src/mesa/drivers/dri/intel/intel_blit.c
+++ b/src/mesa/drivers/dri/intel/intel_blit.c
@ -459,7 +459,7 @@ intelClearWithBlit(GLcontext * ctx, GLbitfield mask)
               struct intel_region *irb_region =
 		  intel_get_rb_region(fb, buf);
               dri_bo *write_buffer =
-                  intel_region_buffer(intel->intelScreen, irb_region,
+                  intel_region_buffer(intel, irb_region,
                                      all ? INTEL_WRITE_FULL :
                                      INTEL_WRITE_PART);

--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@ -40,7 +40,7 @@ static void
 intel_bufferobj_alloc_buffer(struct intel_context *intel,
 			     struct intel_buffer_object *intel_obj)
 {
-   intel_obj->buffer = dri_bo_alloc(intel->intelScreen->bufmgr, "bufferobj",
+   intel_obj->buffer = dri_bo_alloc(intel->bufmgr, "bufferobj",
 				    intel_obj->Base.Size, 64,
 				    DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED);
 }
@ -85,7 +85,7 @@ intel_bufferobj_cow(struct intel_context *intel,
                    struct intel_buffer_object *intel_obj)
 {
   assert(intel_obj->region);
-   intel_region_cow(intel->intelScreen, intel_obj->region);
+   intel_region_cow(intel, intel_obj->region);
 }


--- a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c
+++ b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c
@ -193,6 +193,10 @@ intel_setup_validate_list(dri_bufmgr_ttm *bufmgr_ttm, GLuint *count_p)
 	req->op = drm_bo_validate;
 	req->bo_req.flags = node->flags;
 	req->bo_req.hint = 0;
+#ifdef DRM_BO_HINT_PRESUMED_OFFSET
+	req->bo_req.hint |= DRM_BO_HINT_PRESUMED_OFFSET;
+	req->bo_req.presumed_offset = ((dri_bo *) node->priv)->offset;
+#endif
 	req->bo_req.mask = node->mask;
 	req->bo_req.fence_class = 0; /* Backwards compat. */
 	arg->reloc_handle = 0;
@ -556,6 +560,8 @@ intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name,

    ret = drmBOReference(ttm_bufmgr->fd, handle, &ttm_buf->drm_bo);
    if (ret != 0) {
+       fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n",
+	       name, handle, strerror(-ret));
 	free(ttm_buf);
 	return NULL;
    }
@ -750,7 +756,7 @@ dri_ttm_fence_wait(dri_fence *fence)
    int ret;

    _glthread_LOCK_MUTEX(bufmgr_ttm->mutex);
-    ret = drmFenceWait(bufmgr_ttm->fd, 0, &fence_ttm->drm_fence, 0);
+    ret = drmFenceWait(bufmgr_ttm->fd, DRM_FENCE_FLAG_WAIT_LAZY, &fence_ttm->drm_fence, 0);
    _glthread_UNLOCK_MUTEX(bufmgr_ttm->mutex);
    if (ret != 0) {
 	_mesa_printf("%s:%d: Error %d waiting for fence %s.\n",
@ -838,11 +844,29 @@ dri_ttm_process_reloc(dri_bo *batch_buf, GLuint *count)
    return ptr;
 }

+static void
+intel_update_buffer_offsets (dri_bufmgr_ttm *bufmgr_ttm)
+{
+    struct intel_bo_list *list = &bufmgr_ttm->list;
+    struct intel_bo_node *node;
+    drmMMListHead *l;
+    struct drm_i915_op_arg *arg;
+    struct drm_bo_arg_rep *rep;
+    
+    for (l = list->list.next; l != &list->list; l = l->next) {
+        node = DRMLISTENTRY(struct intel_bo_node, l, head);
+	arg = &node->bo_arg;
+	rep = &arg->d.rep;
+	((dri_bo *) node->priv)->offset = rep->bo_info.offset;
+    }
+}
+
 static void
 dri_ttm_post_submit(dri_bo *batch_buf, dri_fence **last_fence)
 {
    dri_bufmgr_ttm *bufmgr_ttm = (dri_bufmgr_ttm *)batch_buf->bufmgr;

+    intel_update_buffer_offsets (bufmgr_ttm);
    intel_free_validate_list(bufmgr_ttm);
    intel_free_reloc_list(bufmgr_ttm);

--- a/src/mesa/drivers/dri/intel/intel_depthstencil.c
+++ b/src/mesa/drivers/dri/intel/intel_depthstencil.c
@ -95,12 +95,12 @@ map_regions(GLcontext * ctx,
 {
   struct intel_context *intel = intel_context(ctx);
   if (depthRb && depthRb->region) {
-      intel_region_map(intel->intelScreen, depthRb->region);
+      intel_region_map(intel, depthRb->region);
      depthRb->pfMap = depthRb->region->map;
      depthRb->pfPitch = depthRb->region->pitch;
   }
   if (stencilRb && stencilRb->region) {
-      intel_region_map(intel->intelScreen, stencilRb->region);
+      intel_region_map(intel, stencilRb->region);
      stencilRb->pfMap = stencilRb->region->map;
      stencilRb->pfPitch = stencilRb->region->pitch;
   }
@ -113,12 +113,12 @@ unmap_regions(GLcontext * ctx,
 {
   struct intel_context *intel = intel_context(ctx);
   if (depthRb && depthRb->region) {
-      intel_region_unmap(intel->intelScreen, depthRb->region);
+      intel_region_unmap(intel, depthRb->region);
      depthRb->pfMap = NULL;
      depthRb->pfPitch = 0;
   }
   if (stencilRb && stencilRb->region) {
-      intel_region_unmap(intel->intelScreen, stencilRb->region);
+      intel_region_unmap(intel, stencilRb->region);
      stencilRb->pfMap = NULL;
      stencilRb->pfPitch = 0;
   }
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@ -282,7 +282,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
      DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width,
 	  height, pitch);

-      irb->region = intel_region_alloc(intel->intelScreen, cpp, pitch, height);
+      irb->region = intel_region_alloc(intel, cpp, pitch, height);
      if (!irb->region)
         return GL_FALSE;       /* out of memory? */

--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@ -101,7 +101,7 @@ intel_miptree_create(struct intel_context *intel,
      if (!mt->compressed) {
 	 int align;

-	 if (intel->intelScreen->ttm) {
+	 if (intel->ttm) {
 	    /* XXX: Align pitch to multiple of 64 bytes for now to allow
 	     * render-to-texture to work in all cases. This should probably be
 	     * replaced at some point by some scheme to only do this when really
@ -124,7 +124,7 @@ intel_miptree_create(struct intel_context *intel,
 	 mt->pitch /= cpp;
      }

-      mt->region = intel_region_alloc(intel->intelScreen,
+      mt->region = intel_region_alloc(intel,
                                      mt->cpp, mt->pitch, mt->total_height);
   }

@ -305,7 +305,7 @@ intel_miptree_image_map(struct intel_context * intel,
      memcpy(image_offsets, mt->level[level].image_offset,
             mt->level[level].depth * sizeof(GLuint));

-   return (intel_region_map(intel->intelScreen, mt->region) +
+   return (intel_region_map(intel, mt->region) +
           intel_miptree_image_offset(mt, face, level));
 }

@ -314,7 +314,7 @@ intel_miptree_image_unmap(struct intel_context *intel,
                          struct intel_mipmap_tree *mt)
 {
   DBG("%s\n", __FUNCTION__);
-   intel_region_unmap(intel->intelScreen, mt->region);
+   intel_region_unmap(intel, mt->region);
 }


@ -340,7 +340,7 @@ intel_miptree_image_data(struct intel_context *intel,
      height = dst->level[level].height;
      if(dst->compressed)
 	 height /= 4;
-      intel_region_data(intel->intelScreen, dst->region,
+      intel_region_data(intel, dst->region,
                        dst_offset + dst_depth_offset[i], /* dst_offset */
                        0, 0,                             /* dstx, dsty */
                        src,
@ -377,7 +377,7 @@ intel_miptree_image_copy(struct intel_context *intel,
   }

   for (i = 0; i < depth; i++) {
-      intel_region_copy(intel->intelScreen,
+      intel_region_copy(intel,
                        dst->region, dst_offset + dst_depth_offset[i],
                        0,
                        0,
--- a/src/mesa/drivers/dri/intel/intel_regions.c
+++ b/src/mesa/drivers/dri/intel/intel_regions.c
@ -50,7 +50,7 @@
 #define FILE_DEBUG_FLAG DEBUG_REGION

 void
-intel_region_idle(intelScreenPrivate *intelScreen, struct intel_region *region)
+intel_region_idle(struct intel_context *intel, struct intel_region *region)
 {
   DBG("%s\n", __FUNCTION__);
   /* XXX: Using this function is likely bogus -- it ought to only have been
@ -69,12 +69,12 @@ intel_region_idle(intelScreenPrivate *intelScreen, struct intel_region *region)
 /* XXX: Thread safety?
 */
 GLubyte *
-intel_region_map(intelScreenPrivate *intelScreen, struct intel_region *region)
+intel_region_map(struct intel_context *intel, struct intel_region *region)
 {
   DBG("%s\n", __FUNCTION__);
   if (!region->map_refcount++) {
      if (region->pbo)
-         intel_region_cow(intelScreen, region);
+         intel_region_cow(intel, region);

      dri_bo_map(region->buffer, GL_TRUE);
      region->map = region->buffer->virtual;
@ -84,7 +84,7 @@ intel_region_map(intelScreenPrivate *intelScreen, struct intel_region *region)
 }

 void
-intel_region_unmap(intelScreenPrivate *intelScreen, struct intel_region *region)
+intel_region_unmap(struct intel_context *intel, struct intel_region *region)
 {
   DBG("%s\n", __FUNCTION__);
   if (!--region->map_refcount) {
@ -94,7 +94,7 @@ intel_region_unmap(intelScreenPrivate *intelScreen, struct intel_region *region)
 }

 struct intel_region *
-intel_region_alloc(intelScreenPrivate *intelScreen,
+intel_region_alloc(struct intel_context *intel,
                   GLuint cpp, GLuint pitch, GLuint height)
 {
   struct intel_region *region = calloc(sizeof(*region), 1);
@ -106,7 +106,7 @@ intel_region_alloc(intelScreenPrivate *intelScreen,
   region->height = height;     /* needed? */
   region->refcount = 1;

-   region->buffer = dri_bo_alloc(intelScreen->bufmgr, "region",
+   region->buffer = dri_bo_alloc(intel->bufmgr, "region",
 				 pitch * cpp * height, 64, DRM_BO_FLAG_MEM_TT);
   return region;
 }
@ -144,84 +144,6 @@ intel_region_release(struct intel_region **region)
   *region = NULL;
 }

-
-struct intel_region *
-intel_region_create_static(intelScreenPrivate *intelScreen,
-			   const char *name,
-                           GLuint mem_type,
-			   unsigned int bo_handle,
-                           GLuint offset,
-                           void *virtual,
-                           GLuint cpp, GLuint pitch, GLuint height,
-			   GLboolean tiled)
-{
-   struct intel_region *region = calloc(sizeof(*region), 1);
-   DBG("%s\n", __FUNCTION__);
-
-   region->cpp = cpp;
-   region->pitch = pitch;
-   region->height = height;     /* needed? */
-   region->refcount = 1;
-   region->tiled = tiled;
-
-   if (intelScreen->ttm) {
-      assert(bo_handle != -1);
-      region->buffer = intel_ttm_bo_create_from_handle(intelScreen->bufmgr,
-						       name,
-						       bo_handle);
-   } else {
-      region->buffer = dri_bo_alloc_static(intelScreen->bufmgr,
-					   name,
-					   offset, pitch * cpp * height,
-					   virtual,
-					   DRM_BO_FLAG_MEM_TT);
-   }
-
-   return region;
-}
-
-
-
-void
-intel_region_update_static(intelScreenPrivate *intelScreen,
-			   struct intel_region *region,
-			   const char *name,
-                           GLuint mem_type,
-			   unsigned int bo_handle,
-                           GLuint offset,
-                           void *virtual,
-                           GLuint cpp, GLuint pitch, GLuint height,
-			   GLboolean tiled)
-{
-   DBG("%s\n", __FUNCTION__);
-
-   region->cpp = cpp;
-   region->pitch = pitch;
-   region->height = height;     /* needed? */
-   region->tiled = tiled;
-
-   /*
-    * We use a "shared" buffer type to indicate buffers created and
-    * shared by others.
-    */
-
-   dri_bo_unreference(region->buffer);
-   if (intelScreen->ttm) {
-      assert(bo_handle != -1);
-      region->buffer = intel_ttm_bo_create_from_handle(intelScreen->bufmgr,
-						       name,
-						       bo_handle);
-   } else {
-      region->buffer = dri_bo_alloc_static(intelScreen->bufmgr,
-					   name,
-					   offset, pitch * cpp * height,
-					   virtual,
-					   DRM_BO_FLAG_MEM_TT);
-   }
-}
-
-
-
 /*
 * XXX Move this into core Mesa?
 */
@ -266,15 +188,13 @@ _mesa_copy_rect(GLubyte * dst,
 * Currently always memcpy.
 */
 void
-intel_region_data(intelScreenPrivate *intelScreen,
+intel_region_data(struct intel_context *intel,
                  struct intel_region *dst,
                  GLuint dst_offset,
                  GLuint dstx, GLuint dsty,
                  const void *src, GLuint src_pitch,
                  GLuint srcx, GLuint srcy, GLuint width, GLuint height)
 {
-   struct intel_context *intel = intelScreenContext(intelScreen);
-
   DBG("%s\n", __FUNCTION__);

   if (intel == NULL)
@ -283,20 +203,20 @@ intel_region_data(intelScreenPrivate *intelScreen,
   if (dst->pbo) {
      if (dstx == 0 &&
          dsty == 0 && width == dst->pitch && height == dst->height)
-         intel_region_release_pbo(intelScreen, dst);
+         intel_region_release_pbo(intel, dst);
      else
-         intel_region_cow(intelScreen, dst);
+         intel_region_cow(intel, dst);
   }


   LOCK_HARDWARE(intel);

-   _mesa_copy_rect(intel_region_map(intelScreen, dst) + dst_offset,
+   _mesa_copy_rect(intel_region_map(intel, dst) + dst_offset,
                   dst->cpp,
                   dst->pitch,
                   dstx, dsty, width, height, src, src_pitch, srcx, srcy);

-   intel_region_unmap(intelScreen, dst);
+   intel_region_unmap(intel, dst);

   UNLOCK_HARDWARE(intel);

@ -306,7 +226,7 @@ intel_region_data(intelScreenPrivate *intelScreen,
 * push buffers into AGP - will currently do so whenever possible.
 */
 void
-intel_region_copy(intelScreenPrivate *intelScreen,
+intel_region_copy(struct intel_context *intel,
                  struct intel_region *dst,
                  GLuint dst_offset,
                  GLuint dstx, GLuint dsty,
@ -314,8 +234,6 @@ intel_region_copy(intelScreenPrivate *intelScreen,
                  GLuint src_offset,
                  GLuint srcx, GLuint srcy, GLuint width, GLuint height)
 {
-   struct intel_context *intel = intelScreenContext(intelScreen);
-
   DBG("%s\n", __FUNCTION__);

   if (intel == NULL)
@ -324,9 +242,9 @@ intel_region_copy(intelScreenPrivate *intelScreen,
   if (dst->pbo) {
      if (dstx == 0 &&
          dsty == 0 && width == dst->pitch && height == dst->height)
-         intel_region_release_pbo(intelScreen, dst);
+         intel_region_release_pbo(intel, dst);
      else
-         intel_region_cow(intelScreen, dst);
+         intel_region_cow(intel, dst);
   }

   assert(src->cpp == dst->cpp);
@ -343,14 +261,12 @@ intel_region_copy(intelScreenPrivate *intelScreen,
 * push buffers into AGP - will currently do so whenever possible.
 */
 void
-intel_region_fill(intelScreenPrivate *intelScreen,
+intel_region_fill(struct intel_context *intel,
                  struct intel_region *dst,
                  GLuint dst_offset,
                  GLuint dstx, GLuint dsty,
                  GLuint width, GLuint height, GLuint color)
 {
-   struct intel_context *intel = intelScreenContext(intelScreen);
-
   DBG("%s\n", __FUNCTION__);

   if (intel == NULL)
@ -359,9 +275,9 @@ intel_region_fill(intelScreenPrivate *intelScreen,
   if (dst->pbo) {
      if (dstx == 0 &&
          dsty == 0 && width == dst->pitch && height == dst->height)
-         intel_region_release_pbo(intelScreen, dst);
+         intel_region_release_pbo(intel, dst);
      else
-         intel_region_cow(intelScreen, dst);
+         intel_region_cow(intel, dst);
   }

   intelEmitFillBlit(intel,
@ -374,7 +290,7 @@ intel_region_fill(intelScreenPrivate *intelScreen,
 * the pbo's data.
 */
 void
-intel_region_attach_pbo(intelScreenPrivate *intelScreen,
+intel_region_attach_pbo(struct intel_context *intel,
                        struct intel_region *region,
                        struct intel_buffer_object *pbo)
 {
@ -407,7 +323,7 @@ intel_region_attach_pbo(intelScreenPrivate *intelScreen,
 * The pbo gets to keep the data.
 */
 void
-intel_region_release_pbo(intelScreenPrivate *intelScreen,
+intel_region_release_pbo(struct intel_context *intel,
                         struct intel_region *region)
 {
   assert(region->buffer == region->pbo->buffer);
@ -416,7 +332,7 @@ intel_region_release_pbo(intelScreenPrivate *intelScreen,
   dri_bo_unreference(region->buffer);
   region->buffer = NULL;

-   region->buffer = dri_bo_alloc(intelScreen->bufmgr, "region",
+   region->buffer = dri_bo_alloc(intel->bufmgr, "region",
 				 region->pitch * region->cpp * region->height,
 				 64, DRM_BO_FLAG_MEM_TT);
 }
@ -425,16 +341,15 @@ intel_region_release_pbo(intelScreenPrivate *intelScreen,
 * with a copy of the data.
 */
 void
-intel_region_cow(intelScreenPrivate *intelScreen, struct intel_region *region)
+intel_region_cow(struct intel_context *intel, struct intel_region *region)
 {
-   struct intel_context *intel = intelScreenContext(intelScreen);
   struct intel_buffer_object *pbo = region->pbo;
   GLboolean was_locked = intel->locked;

   if (intel == NULL)
      return;

-   intel_region_release_pbo(intelScreen, region);
+   intel_region_release_pbo(intel, region);

   assert(region->cpp * region->pitch * region->height == pbo->Base.Size);

@ -464,15 +379,100 @@ intel_region_cow(intelScreenPrivate *intelScreen, struct intel_region *region)
 }

 dri_bo *
-intel_region_buffer(intelScreenPrivate *intelScreen,
+intel_region_buffer(struct intel_context *intel,
                    struct intel_region *region, GLuint flag)
 {
   if (region->pbo) {
      if (flag == INTEL_WRITE_PART)
-         intel_region_cow(intelScreen, region);
+         intel_region_cow(intel, region);
      else if (flag == INTEL_WRITE_FULL)
-         intel_region_release_pbo(intelScreen, region);
+         intel_region_release_pbo(intel, region);
   }

   return region->buffer;
 }
+
+static struct intel_region *
+intel_recreate_static(struct intel_context *intel,
+		      const char *name,
+		      struct intel_region *region,
+		      intelRegion *region_desc,
+		      GLuint mem_type)
+{
+   intelScreenPrivate *intelScreen = intel->intelScreen;
+
+   if (region == NULL) {
+      region = calloc(sizeof(*region), 1);
+      region->refcount = 1;
+   }
+
+   region->cpp = intelScreen->cpp;
+   region->pitch = region_desc->pitch / intelScreen->cpp;
+   region->height = intelScreen->height;     /* needed? */
+   region->tiled = region_desc->tiled;
+
+   if (intel->ttm) {
+      assert(region_desc->bo_handle != -1);
+      region->buffer = intel_ttm_bo_create_from_handle(intel->bufmgr,
+						       name,
+						       region_desc->bo_handle);
+   } else {
+      region->buffer = dri_bo_alloc_static(intel->bufmgr,
+					   name,
+					   region_desc->offset,
+					   region_desc->pitch *
+					   intelScreen->height,
+					   region_desc->map,
+					   DRM_BO_FLAG_MEM_TT);
+   }
+
+   assert(region->buffer != NULL);
+
+   return region;
+}
+
+/**
+ * Create intel_region structs to describe the static front, back, and depth
+ * buffers created by the xserver.
+ *
+ * Although FBO's mean we now no longer use these as render targets in
+ * all circumstances, they won't go away until the back and depth
+ * buffers become private, and the front buffer will remain even then.
+ *
+ * Note that these don't allocate video memory, just describe
+ * allocations alread made by the X server.
+ */
+void
+intel_recreate_static_regions(struct intel_context *intel)
+{
+   intelScreenPrivate *intelScreen = intel->intelScreen;
+
+   intel->front_region =
+      intel_recreate_static(intel, "front",
+			    intel->front_region,
+			    &intelScreen->front,
+			    DRM_BO_FLAG_MEM_TT);
+
+   intel->back_region =
+      intel_recreate_static(intel, "back",
+			    intel->back_region,
+			    &intelScreen->back,
+			    DRM_BO_FLAG_MEM_TT);
+
+   if (intelScreen->third.handle) {
+      intel->third_region =
+	 intel_recreate_static(intel, "third",
+			       intel->third_region,
+			       &intelScreen->third,
+			       DRM_BO_FLAG_MEM_TT);
+   }
+
+   /* Still assumes front.cpp == depth.cpp.  We can kill this when we move to
+    * private buffers.
+    */
+   intel->depth_region =
+      intel_recreate_static(intel, "depth",
+			    intel->depth_region,
+			    &intelScreen->depth,
+			    DRM_BO_FLAG_MEM_TT);
+}
--- a/src/mesa/drivers/dri/intel/intel_regions.h
+++ b/src/mesa/drivers/dri/intel/intel_regions.h
@ -29,7 +29,7 @@
 #define INTEL_REGIONS_H

 #include "mtypes.h"
-#include "intel_screen.h"
+#include "dri_bufmgr.h"

 struct intel_context;
 struct intel_buffer_object;
@ -62,7 +62,7 @@ struct intel_region
 /* Allocate a refcounted region.  Pointers to regions should only be
 * copied by calling intel_reference_region().
 */
-struct intel_region *intel_region_alloc(intelScreenPrivate *intelScreen,
+struct intel_region *intel_region_alloc(struct intel_context *intel,
                                        GLuint cpp,
                                        GLuint pitch, GLuint height);

@ -71,41 +71,22 @@ void intel_region_reference(struct intel_region **dst,

 void intel_region_release(struct intel_region **ib);

-extern struct intel_region 
-*intel_region_create_static(intelScreenPrivate *intelScreen,
-			    const char *name,
-			    GLuint mem_type,
-			    unsigned int bo_handle,
-			    GLuint offset,
-			    void *virtual,
-			    GLuint cpp,
-			    GLuint pitch, GLuint height, GLboolean tiled);
-extern void 
-intel_region_update_static(intelScreenPrivate *intelScreen,
-			   struct intel_region *region,
-			   const char *name,
-			   GLuint mem_type,
-			   unsigned int bo_handle,
-			   GLuint offset,
-			   void *virtual,
-			   GLuint cpp, GLuint pitch, GLuint height,
-			   GLboolean tiled);
+void intel_recreate_static_regions(struct intel_context *intel);

-
-void intel_region_idle(intelScreenPrivate *intelScreen,
+void intel_region_idle(struct intel_context *intel,
 		       struct intel_region *ib);

 /* Map/unmap regions.  This is refcounted also: 
 */
-GLubyte *intel_region_map(intelScreenPrivate *intelScreen,
+GLubyte *intel_region_map(struct intel_context *intel,
                          struct intel_region *ib);

-void intel_region_unmap(intelScreenPrivate *intelScreen, struct intel_region *ib);
+void intel_region_unmap(struct intel_context *intel, struct intel_region *ib);


 /* Upload data to a rectangular sub-region
 */
-void intel_region_data(intelScreenPrivate *intelScreen,
+void intel_region_data(struct intel_context *intel,
                       struct intel_region *dest,
                       GLuint dest_offset,
                       GLuint destx, GLuint desty,
@ -114,7 +95,7 @@ void intel_region_data(intelScreenPrivate *intelScreen,

 /* Copy rectangular sub-regions
 */
-void intel_region_copy(intelScreenPrivate *intelScreen,
+void intel_region_copy(struct intel_context *intel,
                       struct intel_region *dest,
                       GLuint dest_offset,
                       GLuint destx, GLuint desty,
@ -124,7 +105,7 @@ void intel_region_copy(intelScreenPrivate *intelScreen,

 /* Fill a rectangular sub-region
 */
-void intel_region_fill(intelScreenPrivate *intelScreen,
+void intel_region_fill(struct intel_context *intel,
                       struct intel_region *dest,
                       GLuint dest_offset,
                       GLuint destx, GLuint desty,
@ -132,15 +113,15 @@ void intel_region_fill(intelScreenPrivate *intelScreen,

 /* Helpers for zerocopy uploads, particularly texture image uploads:
 */
-void intel_region_attach_pbo(intelScreenPrivate *intelScreen,
+void intel_region_attach_pbo(struct intel_context *intel,
                             struct intel_region *region,
                             struct intel_buffer_object *pbo);
-void intel_region_release_pbo(intelScreenPrivate *intelScreen,
+void intel_region_release_pbo(struct intel_context *intel,
                              struct intel_region *region);
-void intel_region_cow(intelScreenPrivate *intelScreen,
+void intel_region_cow(struct intel_context *intel,
                      struct intel_region *region);

-dri_bo *intel_region_buffer(intelScreenPrivate *intelScreen,
+dri_bo *intel_region_buffer(struct intel_context *intel,
 			    struct intel_region *region,
 			    GLuint flag);

--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@ -46,12 +46,9 @@
 #include "intel_fbo.h"

 #include "i830_dri.h"
-#include "dri_bufmgr.h"
 #include "intel_regions.h"
 #include "intel_batchbuffer.h"

-#include "intel_bufmgr_ttm.h"
-
 PUBLIC const char __driConfigOptions[] =
   DRI_CONF_BEGIN DRI_CONF_SECTION_PERFORMANCE
   DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS)
@ -143,105 +140,6 @@ intelMapScreenRegions(__DRIscreenPrivate * sPriv)
   return GL_TRUE;
 }

-/** Driver-specific fence emit implementation for the fake memory manager. */
-static unsigned int
-intel_fence_emit(void *private)
-{
-   intelScreenPrivate *intelScreen = (intelScreenPrivate *)private;
-   unsigned int fence;
-
-   /* XXX: Need to emit a flush, if we haven't already (at least with the
-    * current batchbuffer implementation, we have).
-    */
-
-   fence = intelEmitIrqLocked(intelScreen);
-
-   return fence;
-}
-
-/** Driver-specific fence wait implementation for the fake memory manager. */
-static int
-intel_fence_wait(void *private, unsigned int cookie)
-{
-   intelScreenPrivate *intelScreen = (intelScreenPrivate *)private;
-
-   intelWaitIrq(intelScreen, cookie);
-
-   return 0;
-}
-
-static struct intel_region *
-intel_recreate_static(intelScreenPrivate *intelScreen,
-		      const char *name,
-		      struct intel_region *region,
-		      intelRegion *region_desc,
-		      GLuint mem_type)
-{
-  if (region) {
-    intel_region_update_static(intelScreen, region, name, mem_type,
-			       region_desc->bo_handle, region_desc->offset,
-			       region_desc->map, intelScreen->cpp,
-			       region_desc->pitch / intelScreen->cpp,
-			       intelScreen->height, region_desc->tiled);
-  } else {
-    region = intel_region_create_static(intelScreen, name, mem_type,
-					region_desc->bo_handle,
-					region_desc->offset,
-					region_desc->map, intelScreen->cpp,
-					region_desc->pitch / intelScreen->cpp,
-					intelScreen->height,
-					region_desc->tiled);
-  }
-
-  assert(region->buffer != NULL);
-
-  return region;
-}
-    
-
-/* Create intel_region structs to describe the static front,back,depth
- * buffers created by the xserver. 
- *
- * Although FBO's mean we now no longer use these as render targets in
- * all circumstances, they won't go away until the back and depth
- * buffers become private, and the front buffer will remain even then.
- *
- * Note that these don't allocate video memory, just describe
- * allocations alread made by the X server.
- */
-static void
-intel_recreate_static_regions(intelScreenPrivate *intelScreen)
-{
-   intelScreen->front_region =
-      intel_recreate_static(intelScreen, "front",
-			    intelScreen->front_region,
-			    &intelScreen->front,
-			    DRM_BO_FLAG_MEM_TT);
-
-   intelScreen->back_region =
-      intel_recreate_static(intelScreen, "back",
-			    intelScreen->back_region,
-			    &intelScreen->back,
-			    DRM_BO_FLAG_MEM_TT);
-
-   if (intelScreen->third.handle) {
-      intelScreen->third_region =
-	 intel_recreate_static(intelScreen, "third",
-			       intelScreen->third_region,
-			       &intelScreen->third,
-			       DRM_BO_FLAG_MEM_TT);
-   }
-
-   /* Still assumes front.cpp == depth.cpp.  We can kill this when we move to
-    * private buffers.
-    */
-   intelScreen->depth_region =
-      intel_recreate_static(intelScreen, "depth",
-			    intelScreen->depth_region,
-			    &intelScreen->depth,
-			    DRM_BO_FLAG_MEM_TT);
-}
-
 void
 intelUnmapScreenRegions(intelScreenPrivate * intelScreen)
 {
@ -426,10 +324,6 @@ static GLboolean intelInitDriver(__DRIscreenPrivate *sPriv)
      (((GLubyte *) sPriv->pSAREA) + intelScreen->sarea_priv_offset);

   intelScreen->deviceID = gDRIPriv->deviceID;
-   if (intelScreen->deviceID == PCI_CHIP_I865_G)
-      intelScreen->maxBatchSize = 4096;
-   else
-      intelScreen->maxBatchSize = BATCH_SZ;

   intelScreen->mem = gDRIPriv->mem;
   intelScreen->cpp = gDRIPriv->cpp;
@ -496,41 +390,6 @@ static GLboolean intelInitDriver(__DRIscreenPrivate *sPriv)

   sPriv->extensions = intelExtensions;

-   /* If we've got a new enough DDX that's initializing TTM and giving us
-    * object handles for the shared buffers, use that.
-    */
-   intelScreen->ttm = GL_FALSE;
-   if (getenv("INTEL_NO_TTM") == NULL &&
-       intelScreen->driScrnPriv->ddx_version.minor >= 9 &&
-       intelScreen->drmMinor >= 11 &&
-       intelScreen->front.bo_handle != -1) {
-      intelScreen->bufmgr = intel_bufmgr_ttm_init(sPriv->fd,
-						  DRM_FENCE_TYPE_EXE,
-						  DRM_FENCE_TYPE_EXE |
-						  DRM_I915_FENCE_TYPE_RW,
-						  BATCH_SZ);
-      if (intelScreen->bufmgr != NULL)
-	 intelScreen->ttm = GL_TRUE;
-   }
-   /* Otherwise, use the classic buffer manager. */
-   if (intelScreen->bufmgr == NULL) {
-      if (intelScreen->tex.size == 0) {
-	 fprintf(stderr, "[%s:%u] Error initializing buffer manager.\n",
-		 __func__, __LINE__);
-	 return GL_FALSE;
-      }
-      fprintf(stderr, "[%s:%u] Failed to init TTM buffer manager, falling back"
-	      " to classic.\n", __func__, __LINE__);
-      intelScreen->bufmgr = dri_bufmgr_fake_init(intelScreen->tex.offset,
-						 intelScreen->tex.map,
-						 intelScreen->tex.size,
-						 intel_fence_emit,
-						 intel_fence_wait,
-						 intelScreen);
-   }
-
-   intel_recreate_static_regions(intelScreen);
-
   return GL_TRUE;
 }

@ -542,7 +401,6 @@ intelDestroyScreen(__DRIscreenPrivate * sPriv)

   intelUnmapScreenRegions(intelScreen);

-   dri_bufmgr_destroy(intelScreen->bufmgr);
   FREE(intelScreen);
   sPriv->private = NULL;
 }
--- a/src/mesa/drivers/dri/intel/intel_screen.h
+++ b/src/mesa/drivers/dri/intel/intel_screen.h
@ -32,7 +32,6 @@
 #include "dri_util.h"
 #include "i830_common.h"
 #include "xmlconfig.h"
-#include "dri_bufmgr.h"

 /* XXX: change name or eliminate to avoid conflict with "struct
 * intel_region"!!!
@ -61,11 +60,6 @@ typedef struct
   intelRegion depth;
   intelRegion tex;

-   struct intel_region *front_region;
-   struct intel_region *back_region;
-   struct intel_region *third_region;
-   struct intel_region *depth_region;
-
   int deviceID;
   int width;
   int height;
@ -89,17 +83,6 @@ typedef struct
   * Configuration cache with default values for all contexts
   */
   driOptionCache optionCache;
-
-   dri_bufmgr *bufmgr;
-   unsigned int maxBatchSize;
-
-   /**
-    * This value indicates that the kernel memory manager is being used
-    * instead of the fake client-side memory manager.
-    */
-   GLboolean ttm;
-
-   unsigned batch_id;
 } intelScreenPrivate;


--- a/src/mesa/drivers/dri/intel/intel_span.c
+++ b/src/mesa/drivers/dri/intel/intel_span.c
@ -195,9 +195,9 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map)
            /* this is a user-created intel_renderbuffer */
            if (irb->region) {
               if (map)
-                  intel_region_map(intel->intelScreen, irb->region);
+                  intel_region_map(intel, irb->region);
               else
-                  intel_region_unmap(intel->intelScreen, irb->region);
+                  intel_region_unmap(intel, irb->region);
               irb->pfMap = irb->region->map;
               irb->pfPitch = irb->region->pitch;
            }
@ -228,9 +228,9 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map)
   irb = intel_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer);
   if (irb && irb->region) {
      if (map)
-         intel_region_map(intel->intelScreen, irb->region);
+         intel_region_map(intel, irb->region);
      else
-         intel_region_unmap(intel->intelScreen, irb->region);
+         intel_region_unmap(intel, irb->region);
      irb->pfMap = irb->region->map;
      irb->pfPitch = irb->region->pitch;
   }
@ -269,12 +269,12 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map)
      irb = intel_renderbuffer(ctx->DrawBuffer->_DepthBuffer->Wrapped);
      if (irb && irb->region && irb->Base.Name != 0) {
         if (map) {
-            intel_region_map(intel->intelScreen, irb->region);
+            intel_region_map(intel, irb->region);
            irb->pfMap = irb->region->map;
            irb->pfPitch = irb->region->pitch;
         }
         else {
-            intel_region_unmap(intel->intelScreen, irb->region);
+            intel_region_unmap(intel, irb->region);
            irb->pfMap = NULL;
            irb->pfPitch = 0;
         }
@ -286,12 +286,12 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map)
      irb = intel_renderbuffer(ctx->DrawBuffer->_StencilBuffer->Wrapped);
      if (irb && irb->region && irb->Base.Name != 0) {
         if (map) {
-            intel_region_map(intel->intelScreen, irb->region);
+            intel_region_map(intel, irb->region);
            irb->pfMap = irb->region->map;
            irb->pfPitch = irb->region->pitch;
         }
         else {
-            intel_region_unmap(intel->intelScreen, irb->region);
+            intel_region_unmap(intel, irb->region);
            irb->pfMap = NULL;
            irb->pfPitch = 0;
         }
@ -320,9 +320,9 @@ intelSpanRenderStart(GLcontext * ctx)
   /* Just map the framebuffer and all textures.  Bufmgr code will
    * take care of waiting on the necessary fences:
    */
-   intel_region_map(intel->intelScreen, intel->front_region);
-   intel_region_map(intel->intelScreen, intel->back_region);
-   intel_region_map(intel->intelScreen, intel->intelScreen->depth_region);
+   intel_region_map(intel, intel->front_region);
+   intel_region_map(intel, intel->back_region);
+   intel_region_map(intel, intel->depth_region);
 #endif

   for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
@ -352,7 +352,7 @@ intelSpanRenderFinish(GLcontext * ctx)
 #if 0
   intel_region_unmap(intel, intel->front_region);
   intel_region_unmap(intel, intel->back_region);
-   intel_region_unmap(intel, intel->intelScreen->depth_region);
+   intel_region_unmap(intel, intel->depth_region);
 #endif

   for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
--- a/src/mesa/drivers/dri/intel/intel_tex_format.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_format.c
@ -140,6 +140,27 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
   case GL_DEPTH24_STENCIL8_EXT:
      return &_mesa_texformat_z24_s8;

+#ifndef I915
+   case GL_SRGB_EXT:
+   case GL_SRGB8_EXT:
+   case GL_SRGB_ALPHA_EXT:
+   case GL_SRGB8_ALPHA8_EXT:
+   case GL_SLUMINANCE_EXT:
+   case GL_SLUMINANCE8_EXT:
+   case GL_SLUMINANCE_ALPHA_EXT:
+   case GL_SLUMINANCE8_ALPHA8_EXT:
+   case GL_COMPRESSED_SRGB_EXT:
+   case GL_COMPRESSED_SRGB_ALPHA_EXT:
+   case GL_COMPRESSED_SLUMINANCE_EXT:
+   case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT:
+       return &_mesa_texformat_srgba8;
+   case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
+   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
+   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
+   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
+     return &_mesa_texformat_srgb_dxt1;
+#endif
+
   default:
      fprintf(stderr, "unexpected texture format %s in %s\n",
              _mesa_lookup_enum_by_nr(internalFormat), __FUNCTION__);
--- a/src/mesa/drivers/dri/intel/intel_tex_image.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_image.c
@ -222,7 +222,7 @@ try_pbo_upload(struct intel_context *intel,
   LOCK_HARDWARE(intel);
   {
      dri_bo *src_buffer = intel_bufferobj_buffer(intel, pbo, INTEL_READ);
-      dri_bo *dst_buffer = intel_region_buffer(intel->intelScreen,
+      dri_bo *dst_buffer = intel_region_buffer(intel,
 					       intelImage->mt->region,
 					       INTEL_WRITE_FULL);

@ -280,7 +280,7 @@ try_pbo_zcopy(struct intel_context *intel,
      return GL_FALSE;
   }

-   intel_region_attach_pbo(intel->intelScreen, intelImage->mt->region, pbo);
+   intel_region_attach_pbo(intel, intelImage->mt->region, pbo);

   return GL_TRUE;
 }
@ -459,7 +459,7 @@ intelTexImage(GLcontext * ctx,


   if (intelImage->mt)
-      intel_region_idle(intel->intelScreen, intelImage->mt->region);
+      intel_region_idle(intel, intelImage->mt->region);

   LOCK_HARDWARE(intel);

--- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
@ -65,7 +65,7 @@ intelTexSubimage(GLcontext * ctx,
      return;

   if (intelImage->mt)
-      intel_region_idle(intel->intelScreen, intelImage->mt->region);
+      intel_region_idle(intel, intelImage->mt->region);

   LOCK_HARDWARE(intel);

--- a/src/mesa/shader/arbprogparse.c
+++ b/src/mesa/shader/arbprogparse.c
@ -1768,7 +1768,7 @@ parse_param_elements (GLcontext * ctx, const GLubyte ** inst,
 {
   GLint idx;
   GLuint err = 0;
-   gl_state_index state_tokens[STATE_LENGTH];
+   gl_state_index state_tokens[STATE_LENGTH] = {0, 0, 0, 0, 0};
   GLfloat const_values[4];

   switch (*(*inst)++) {
--- a/src/mesa/shader/prog_parameter.c
+++ b/src/mesa/shader/prog_parameter.c
@ -384,7 +384,7 @@ sizeof_state_reference(const GLint *stateTokens)
 *    PARAM ambient = state.material.front.ambient;
 *
 * \param paramList  the parameter list
- * \param state  an array of 6 (STATE_LENGTH) state tokens
+ * \param stateTokens  an array of 5 (STATE_LENGTH) state tokens
 * \return index of the new parameter.
 */
 GLint