Merge branch 'drm-gem'

Conflicts: src/mesa/drivers/dri/intel/intel_span.c src/mesa/main/fbobject.c This converts the i915 driver to use the GEM interfaces for object management.
2026-02-18 21:20:29 +01:00 · 2008-08-08 15:32:24 -07:00 · 2008-08-08 15:32:24 -07:00 · 53675e5c05
commit 53675e5c05
parent 501338d70e d2796939f1
76 changed files with 1756 additions and 4320 deletions
--- a/src/mesa/drivers/dri/Makefile.template
+++ b/src/mesa/drivers/dri/Makefile.template
@ -10,11 +10,6 @@ COMMON_SOURCES = \
        ../common/xmlconfig.c \
        ../common/drirenderbuffer.c 

-COMMON_BM_SOURCES = \
-	../common/dri_bufmgr.c \
-	../common/dri_bufmgr_fake.c
-
-
 ifeq ($(WINDOW_SYSTEM),dri)
 WINOBJ=
 WINLIB=
--- a/src/mesa/drivers/dri/common/dri_bufmgr.c
+++ b/src/mesa/drivers/dri/common/dri_bufmgr.c
@ -1,160 +0,0 @@
-/*
- * Copyright © 2007 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *
- */
-
-#include <string.h>
-#include <stdlib.h>
-#include <assert.h>
-#include "mtypes.h"
-#include "dri_bufmgr.h"
-
-/** @file dri_bufmgr.c
- *
- * Convenience functions for buffer management methods.
- */
-
-dri_bo *
-dri_bo_alloc(dri_bufmgr *bufmgr, const char *name, unsigned long size,
-	     unsigned int alignment, uint64_t location_mask)
-{
-   return bufmgr->bo_alloc(bufmgr, name, size, alignment, location_mask);
-}
-
-dri_bo *
-dri_bo_alloc_static(dri_bufmgr *bufmgr, const char *name, unsigned long offset,
-		    unsigned long size, void *virtual,
-		    uint64_t location_mask)
-{
-   return bufmgr->bo_alloc_static(bufmgr, name, offset, size, virtual,
-				  location_mask);
-}
-
-void
-dri_bo_reference(dri_bo *bo)
-{
-   bo->bufmgr->bo_reference(bo);
-}
-
-void
-dri_bo_unreference(dri_bo *bo)
-{
-   if (bo == NULL)
-      return;
-
-   bo->bufmgr->bo_unreference(bo);
-}
-
-int
-dri_bo_map(dri_bo *buf, GLboolean write_enable)
-{
-   return buf->bufmgr->bo_map(buf, write_enable);
-}
-
-int
-dri_bo_unmap(dri_bo *buf)
-{
-   return buf->bufmgr->bo_unmap(buf);
-}
-
-void
-dri_fence_wait(dri_fence *fence)
-{
-   fence->bufmgr->fence_wait(fence);
-}
-
-void
-dri_fence_reference(dri_fence *fence)
-{
-   fence->bufmgr->fence_reference(fence);
-}
-
-void
-dri_fence_unreference(dri_fence *fence)
-{
-   if (fence == NULL)
-      return;
-
-   fence->bufmgr->fence_unreference(fence);
-}
-
-void
-dri_bo_subdata(dri_bo *bo, unsigned long offset,
-	       unsigned long size, const void *data)
-{
-   if (size == 0 || data == NULL)
-      return;
-
-   dri_bo_map(bo, GL_TRUE);
-   memcpy((unsigned char *)bo->virtual + offset, data, size);
-   dri_bo_unmap(bo);
-}
-
-void
-dri_bo_get_subdata(dri_bo *bo, unsigned long offset,
-		   unsigned long size, void *data)
-{
-   if (size == 0 || data == NULL)
-      return;
-
-   dri_bo_map(bo, GL_FALSE);
-   memcpy(data, (unsigned char *)bo->virtual + offset, size);
-   dri_bo_unmap(bo);
-}
-
-void
-dri_bufmgr_destroy(dri_bufmgr *bufmgr)
-{
-   bufmgr->destroy(bufmgr);
-}
-
-
-int dri_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta,
-		    GLuint offset, dri_bo *target_buf)
-{
-   return reloc_buf->bufmgr->emit_reloc(reloc_buf, flags, delta, offset, target_buf);
-}
-
-void *dri_process_relocs(dri_bo *batch_buf, GLuint *count)
-{
-   return batch_buf->bufmgr->process_relocs(batch_buf, count);
-}
-
-void dri_post_submit(dri_bo *batch_buf, dri_fence **last_fence)
-{
-   batch_buf->bufmgr->post_submit(batch_buf, last_fence);
-}
-
-void
-dri_bufmgr_set_debug(dri_bufmgr *bufmgr, GLboolean enable_debug)
-{
-   bufmgr->debug = enable_debug;
-}
-
-int
-dri_bufmgr_check_aperture_space(dri_bo *bo)
-{
-    return bo->bufmgr->check_aperture_space(bo);
-}
--- a/src/mesa/drivers/dri/common/dri_bufmgr.h
+++ b/src/mesa/drivers/dri/common/dri_bufmgr.h
@ -1,260 +0,0 @@
-/**************************************************************************
- * 
- * Copyright © 2007 Intel Corporation
- * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * 
- **************************************************************************/
-/*
- * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
- *          Keith Whitwell <keithw-at-tungstengraphics-dot-com>
- *	    Eric Anholt <eric@anholt.net>
- */
-
-#ifndef _DRI_BUFMGR_H_
-#define _DRI_BUFMGR_H_
-#include <xf86drm.h>
-
-typedef struct _dri_bufmgr dri_bufmgr;
-typedef struct _dri_bo dri_bo;
-typedef struct _dri_fence dri_fence;
-
-struct _dri_bo {
-   /** Size in bytes of the buffer object. */
-   unsigned long size;
-   /**
-    * Card virtual address (offset from the beginning of the aperture) for the
-    * object.  Only valid while validated.
-    */
-   unsigned long offset;
-   /**
-    * Virtual address for accessing the buffer data.  Only valid while mapped.
-    */
-   void *virtual;
-   /** Buffer manager context associated with this buffer object */
-   dri_bufmgr *bufmgr;
-};
-
-struct _dri_fence {
-   /**
-    * This is an ORed mask of DRM_BO_FLAG_READ, DRM_BO_FLAG_WRITE, and
-    * DRM_FLAG_EXE indicating the operations associated with this fence.
-    *
-    * It is constant for the life of the fence object.
-    */
-   unsigned int type;
-   /** Buffer manager context associated with this fence */
-   dri_bufmgr *bufmgr;
-};
-
-/**
- * Context for a buffer manager instance.
- *
- * Contains public methods followed by private storage for the buffer manager.
- */
-struct _dri_bufmgr {
-   /**
-    * Allocate a buffer object.
-    *
-    * Buffer objects are not necessarily initially mapped into CPU virtual
-    * address space or graphics device aperture.  They must be mapped using
-    * bo_map() to be used by the CPU, and validated for use using bo_validate()
-    * to be used from the graphics device.
-    */
-   dri_bo *(*bo_alloc)(dri_bufmgr *bufmgr_ctx, const char *name,
-		       unsigned long size, unsigned int alignment,
-		       uint64_t location_mask);
-
-   /**
-    * Allocates a buffer object for a static allocation.
-    *
-    * Static allocations are ones such as the front buffer that are offered by
-    * the X Server, which are never evicted and never moved.
-    */
-   dri_bo *(*bo_alloc_static)(dri_bufmgr *bufmgr_ctx, const char *name,
-			      unsigned long offset, unsigned long size,
-			      void *virtual, uint64_t location_mask);
-
-   /** Takes a reference on a buffer object */
-   void (*bo_reference)(dri_bo *bo);
-
-   /**
-    * Releases a reference on a buffer object, freeing the data if
-    * rerefences remain.
-    */
-   void (*bo_unreference)(dri_bo *bo);
-
-   /**
-    * Maps the buffer into userspace.
-    *
-    * This function will block waiting for any existing fence on the buffer to
-    * clear, first.  The resulting mapping is available at buf->virtual.
-\    */
-   int (*bo_map)(dri_bo *buf, GLboolean write_enable);
-
-   /** Reduces the refcount on the userspace mapping of the buffer object. */
-   int (*bo_unmap)(dri_bo *buf);
-
-   /** Takes a reference on a fence object */
-   void (*fence_reference)(dri_fence *fence);
-
-   /**
-    * Releases a reference on a fence object, freeing the data if
-    * rerefences remain.
-    */
-   void (*fence_unreference)(dri_fence *fence);
-
-   /**
-    * Blocks until the given fence is signaled.
-    */
-   void (*fence_wait)(dri_fence *fence);
-
-   /**
-    * Tears down the buffer manager instance.
-    */
-   void (*destroy)(dri_bufmgr *bufmgr);
-
-   /**
-    * Add relocation entry in reloc_buf, which will be updated with the
-    * target buffer's real offset on on command submission.
-    *
-    * Relocations remain in place for the lifetime of the buffer object.
-    *
-    * \param reloc_buf Buffer to write the relocation into.
-    * \param flags BO flags to be used in validating the target buffer.
-    *	     Applicable flags include:
-    *	     - DRM_BO_FLAG_READ: The buffer will be read in the process of
-    *	       command execution.
-    *	     - DRM_BO_FLAG_WRITE: The buffer will be written in the process of
-    *	       command execution.
-    *	     - DRM_BO_FLAG_MEM_TT: The buffer should be validated in TT memory.
-    *	     - DRM_BO_FLAG_MEM_VRAM: The buffer should be validated in video
-    *	       memory.
-    * \param delta Constant value to be added to the relocation target's offset.
-    * \param offset Byte offset within batch_buf of the relocated pointer.
-    * \param target Buffer whose offset should be written into the relocation
-    *	     entry.
-    */
-   int (*emit_reloc)(dri_bo *reloc_buf, uint64_t flags, GLuint delta,
-		      GLuint offset, dri_bo *target);
-
-   /**
-    * Processes the relocations, either in userland or by converting the list
-    * for use in batchbuffer submission.
-    *
-    * Kernel-based implementations will return a pointer to the arguments
-    * to be handed with batchbuffer submission to the kernel.  The userland
-    * implementation performs the buffer validation and emits relocations
-    * into them the appopriate order.
-    *
-    * \param batch_buf buffer at the root of the tree of relocations
-    * \param count returns the number of buffers validated.
-    * \return relocation record for use in command submission.
-    * */
-   void *(*process_relocs)(dri_bo *batch_buf, GLuint *count);
-
-   void (*post_submit)(dri_bo *batch_buf, dri_fence **fence);
-
-   int (*check_aperture_space)(dri_bo *bo);
-   GLboolean debug; /**< Enables verbose debugging printouts */
-};
-
-dri_bo *dri_bo_alloc(dri_bufmgr *bufmgr, const char *name, unsigned long size,
-		     unsigned int alignment, uint64_t location_mask);
-dri_bo *dri_bo_alloc_static(dri_bufmgr *bufmgr, const char *name,
-			    unsigned long offset, unsigned long size,
-			    void *virtual, uint64_t location_mask);
-void dri_bo_reference(dri_bo *bo);
-void dri_bo_unreference(dri_bo *bo);
-int dri_bo_map(dri_bo *buf, GLboolean write_enable);
-int dri_bo_unmap(dri_bo *buf);
-void dri_fence_wait(dri_fence *fence);
-void dri_fence_reference(dri_fence *fence);
-void dri_fence_unreference(dri_fence *fence);
-
-void dri_bo_subdata(dri_bo *bo, unsigned long offset,
-		    unsigned long size, const void *data);
-void dri_bo_get_subdata(dri_bo *bo, unsigned long offset,
-			unsigned long size, void *data);
-
-void dri_bufmgr_fake_contended_lock_take(dri_bufmgr *bufmgr);
-dri_bufmgr *dri_bufmgr_fake_init(unsigned long low_offset, void *low_virtual,
-				 unsigned long size,
-				 unsigned int (*fence_emit)(void *private),
-				 int (*fence_wait)(void *private,
-						   unsigned int cookie),
-				 void *driver_priv);
-void dri_bufmgr_set_debug(dri_bufmgr *bufmgr, GLboolean enable_debug);
-void dri_bo_fake_disable_backing_store(dri_bo *bo,
-				       void (*invalidate_cb)(dri_bo *bo,
-							     void *ptr),
-				       void *ptr);
-void dri_bufmgr_destroy(dri_bufmgr *bufmgr);
-
-int dri_emit_reloc(dri_bo *reloc_buf, uint64_t flags, GLuint delta,
-		   GLuint offset, dri_bo *target_buf);
-void *dri_process_relocs(dri_bo *batch_buf, uint32_t *count);
-void dri_post_process_relocs(dri_bo *batch_buf);
-void dri_post_submit(dri_bo *batch_buf, dri_fence **last_fence);
-int dri_bufmgr_check_aperture_space(dri_bo *bo);
-
-#ifndef TTM_API
-/* reuse some TTM API */
-
-#define DRM_BO_MEM_LOCAL 0
-#define DRM_BO_MEM_TT 1
-#define DRM_BO_MEM_VRAM 2
-#define DRM_BO_MEM_PRIV0 3
-#define DRM_BO_MEM_PRIV1 4
-#define DRM_BO_MEM_PRIV2 5
-#define DRM_BO_MEM_PRIV3 6
-#define DRM_BO_MEM_PRIV4 7
-
-#define DRM_BO_FLAG_READ        (1ULL << 0)
-#define DRM_BO_FLAG_WRITE       (1ULL << 1)
-#define DRM_BO_FLAG_EXE         (1ULL << 2)
-#define DRM_BO_MASK_ACCESS	(DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE | DRM_BO_FLAG_EXE)
-#define DRM_BO_FLAG_NO_EVICT    (1ULL << 4)
-
-#define DRM_BO_FLAG_MAPPABLE    (1ULL << 5)
-#define DRM_BO_FLAG_SHAREABLE   (1ULL << 6)
-
-#define DRM_BO_FLAG_CACHED      (1ULL << 7)
-
-#define DRM_BO_FLAG_NO_MOVE     (1ULL << 8)
-#define DRM_BO_FLAG_CACHED_MAPPED    (1ULL << 19)
-#define DRM_BO_FLAG_FORCE_CACHING  (1ULL << 13)
-#define DRM_BO_FLAG_FORCE_MAPPABLE (1ULL << 14)
-#define DRM_BO_FLAG_TILE           (1ULL << 15)
-
-#define DRM_BO_FLAG_MEM_LOCAL  (1ULL << 24)
-#define DRM_BO_FLAG_MEM_TT     (1ULL << 25)
-#define DRM_BO_FLAG_MEM_VRAM   (1ULL << 26)
-
-#define DRM_BO_MASK_MEM         0x00000000FF000000ULL
-
-#define DRM_FENCE_TYPE_EXE                 0x00000001
-#endif
-
-#endif
--- a/src/mesa/drivers/dri/common/dri_bufmgr_fake.c
+++ b/src/mesa/drivers/dri/common/dri_bufmgr_fake.c
--- a/src/mesa/drivers/dri/common/spantmp2.h
+++ b/src/mesa/drivers/dri/common/spantmp2.h
@ -48,36 +48,34 @@
 #define HW_WRITE_CLIPLOOP()	HW_CLIPLOOP()
 #endif

-
 #if (SPANTMP_PIXEL_FMT == GL_RGB)  && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)

 /**
 ** GL_RGB, GL_UNSIGNED_SHORT_5_6_5
 **/

+#ifndef GET_VALUE
 #ifndef GET_PTR
 #define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch)
 #endif

+#define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y))
+#define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v)
+#endif /* GET_VALUE */
+
 #define INIT_MONO_PIXEL(p, color) \
  p = PACK_COLOR_565( color[0], color[1], color[2] )

 #define WRITE_RGBA( _x, _y, r, g, b, a )				\
-    do {                                                                \
-       GLshort * _p = (GLshort *) GET_PTR(_x, _y);                      \
-       _p[0] = ((((int)r & 0xf8) << 8) | (((int)g & 0xfc) << 3) |	\
-		   (((int)b & 0xf8) >> 3));                             \
-   } while(0)
+   PUT_VALUE(_x, _y, ((((int)r & 0xf8) << 8) |				\
+		      (((int)g & 0xfc) << 3) |				\
+		      (((int)b & 0xf8) >> 3)))				\

-#define WRITE_PIXEL( _x, _y, p )					\
-   do {                                                                 \
-      GLushort * _p = (GLushort *) GET_PTR(_x, _y);                     \
-      _p[0] = p;                                                        \
-   } while(0)
+#define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p)

 #define READ_RGBA( rgba, _x, _y )					\
   do {									\
-      GLushort p = *(volatile GLshort *) GET_PTR(_x, _y);               \
+      GLushort p = GET_VALUE(_x, _y);					\
      rgba[0] = ((p >> 8) & 0xf8) * 255 / 0xf8;				\
      rgba[1] = ((p >> 3) & 0xfc) * 255 / 0xfc;				\
      rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8;				\
@ -90,29 +88,30 @@
 ** GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV
 **/

+#ifndef GET_VALUE
 #ifndef GET_PTR
 #define GET_PTR(_x, _y) (     buf + (_x) * 4 + (_y) * pitch)
 #endif

+#define GET_VALUE(_x, _y) *(volatile GLuint *)(GET_PTR(_x, _y))
+#define PUT_VALUE(_x, _y, _v) *(volatile GLuint *)(GET_PTR(_x, _y)) = (_v)
+#endif /* GET_VALUE */
+
 # define INIT_MONO_PIXEL(p, color)                       \
     p = PACK_COLOR_8888(color[3], color[0], color[1], color[2]) 

 # define WRITE_RGBA(_x, _y, r, g, b, a)                                 \
-    do {                                                                \
-       GLuint * _p = (GLuint *) GET_PTR(_x, _y);                        \
-       _p[0] = ((r << 16) | (g << 8) | (b << 0) | (a << 24));           \
-    } while(0)
+   PUT_VALUE(_x, _y, ((r << 16) |					\
+		      (g << 8) |					\
+		      (b << 0) |					\
+		      (a << 24)))

-#define WRITE_PIXEL(_x, _y, p)                                          \
-    do {                                                                \
-       GLuint * _p = (GLuint *) GET_PTR(_x, _y);                        \
-       _p[0] = p;                                                       \
-    } while(0)
+#define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p)

 # if defined( USE_X86_ASM )
 #  define READ_RGBA(rgba, _x, _y)                                       \
    do {                                                                \
-        GLuint p = *(volatile GLuint *) GET_PTR(_x, _y);                \
+       GLuint p = GET_VALUE(_x, _y);					\
       __asm__ __volatile__( "bswap	%0; rorl $8, %0"                \
 				: "=r" (p) : "0" (p) );                 \
       ((GLuint *)rgba)[0] = p;                                         \
@ -123,14 +122,14 @@
     */
 #  define READ_RGBA( rgba, _x, _y )				        \
     do {								\
-        GLuint p = *(volatile GLuint *) GET_PTR(_x, _y);                \
+        GLuint p = GET_VALUE(_x, _y);					\
        GLuint t = p;                                                   \
        *((uint32_t *) rgba) = (t >> 24) | (p << 8);                    \
     } while (0)
 # else
 #  define READ_RGBA( rgba, _x, _y )				        \
     do {								\
-        GLuint p = *(volatile GLuint *) GET_PTR(_x, _y);                \
+        GLuint p = GET_VALUE(_x, _y);					\
 	rgba[0] = (p >> 16) & 0xff;					\
 	rgba[1] = (p >>  8) & 0xff;					\
 	rgba[2] = (p >>  0) & 0xff;					\
@ -389,7 +388,8 @@ static void TAG(ReadRGBASpan)( GLcontext *ctx,
 }


-#if defined(USE_MMX_ASM) && \
+#if defined(GET_PTR) && \
+   defined(USE_MMX_ASM) && \
   (((SPANTMP_PIXEL_FMT == GL_BGRA) && \
 	(SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)) || \
    ((SPANTMP_PIXEL_FMT == GL_RGB) && \
@ -440,7 +440,8 @@ static void TAG2(ReadRGBASpan,_MMX)( GLcontext *ctx,
 #endif


-#if defined(USE_SSE_ASM) && \
+#if defined(GET_PTR) &&	\
+   defined(USE_SSE_ASM) && \
   (SPANTMP_PIXEL_FMT == GL_BGRA) && \
     (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
 static void TAG2(ReadRGBASpan,_SSE2)( GLcontext *ctx,
@ -474,7 +475,8 @@ static void TAG2(ReadRGBASpan,_SSE2)( GLcontext *ctx,
 }
 #endif

-#if defined(USE_SSE_ASM) && \
+#if defined(GET_PTR) &&	\
+   defined(USE_SSE_ASM) && \
   (SPANTMP_PIXEL_FMT == GL_BGRA) && \
     (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
 static void TAG2(ReadRGBASpan,_SSE)( GLcontext *ctx,
@ -567,6 +569,7 @@ static void TAG(InitPointers)(struct gl_renderbuffer *rb)
   rb->PutMonoValues = TAG(WriteMonoRGBAPixels);
   rb->GetValues = TAG(ReadRGBAPixels);

+#if defined(GET_PTR)
 #if defined(USE_SSE_ASM) && \
   (SPANTMP_PIXEL_FMT == GL_BGRA) && \
     (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
@ -596,6 +599,7 @@ static void TAG(InitPointers)(struct gl_renderbuffer *rb)
   }
   else
 #endif
+#endif /* GET_PTR */
   {
      if (DBG) fprintf( stderr, "Using %s version of GetRow\n", "C" );
      rb->GetRow = TAG(ReadRGBASpan);
@ -610,6 +614,8 @@ static void TAG(InitPointers)(struct gl_renderbuffer *rb)
 #undef READ_RGBA
 #undef TAG
 #undef TAG2
+#undef GET_VALUE
+#undef PUT_VALUE
 #undef GET_PTR
 #undef SPANTMP_PIXEL_FMT
 #undef SPANTMP_PIXEL_TYPE
--- a/src/mesa/drivers/dri/i915/Makefile
+++ b/src/mesa/drivers/dri/i915/Makefile
@ -30,8 +30,8 @@ DRIVER_SOURCES = \
 	intel_pixel.c \
 	intel_pixel_bitmap.c \
 	intel_pixel_copy.c \
-	intel_pixel_read.c \
 	intel_pixel_draw.c \
+	intel_pixel_read.c \
 	intel_buffers.c \
 	intel_blit.c \
 	i915_tex.c \
@ -52,12 +52,10 @@ DRIVER_SOURCES = \
 	intel_state.c \
 	intel_tris.c \
 	intel_fbo.c \
-	intel_depthstencil.c \
-	intel_bufmgr_ttm.c
+	intel_depthstencil.c

 C_SOURCES = \
 	$(COMMON_SOURCES) \
-	$(COMMON_BM_SOURCES) \
 	$(DRIVER_SOURCES)

 ASM_SOURCES = 
@ -69,6 +67,7 @@ DRIVER_DEFINES = -I../intel -I../intel/server -DI915 \
 include ../Makefile.template

 intel_decode.o: ../intel/intel_decode.c
+
 intel_tex_layout.o: ../intel/intel_tex_layout.c

 symlinks:
--- a/src/mesa/drivers/dri/i915/i830_context.c
+++ b/src/mesa/drivers/dri/i915/i830_context.c
@ -81,6 +81,9 @@ i830CreateContext(const __GLcontextModes * mesaVis,
   _tnl_destroy_pipeline(ctx);
   _tnl_install_pipeline(ctx, intel_pipeline);

+   if (intel->no_rast)
+      FALLBACK(intel, INTEL_FALLBACK_USER, 1);
+
   intel->ctx.Const.MaxTextureUnits = I830_TEX_UNITS;
   intel->ctx.Const.MaxTextureImageUnits = I830_TEX_UNITS;
   intel->ctx.Const.MaxTextureCoordUnits = I830_TEX_UNITS;
--- a/src/mesa/drivers/dri/i915/i830_reg.h
+++ b/src/mesa/drivers/dri/i915/i830_reg.h
@ -494,10 +494,6 @@
 #define VFT1_TEX0_FMT(x)	(x)
 #define VFT1_TEX0_MASK          3
 #define VFT1_TEX1_SHIFT         2
-#define TEXCOORDFMT_2D		0
-#define TEXCOORDFMT_3D		1
-#define TEXCOORDFMT_4D		2
-#define TEXCOORDFMT_1D		3

 /*New stuff picked up along the way */

@ -635,8 +631,4 @@
 #define ENABLE_TEX_STREAM_MAP_IDX	(1<<3)
 #define TEX_STREAM_MAP_IDX(x)		(x)

-
-#define MI_FLUSH           ((0<<29)|(4<<23))
-#define FLUSH_MAP_CACHE    (1<<0)
-
 #endif
--- a/src/mesa/drivers/dri/i915/i830_vtbl.c
+++ b/src/mesa/drivers/dri/i915/i830_vtbl.c
@ -31,6 +31,7 @@
 #include "i830_reg.h"
 #include "intel_batchbuffer.h"
 #include "intel_regions.h"
+#include "intel_tris.h"
 #include "tnl/t_context.h"
 #include "tnl/t_vertex.h"

@ -419,10 +420,12 @@ i830_emit_state(struct intel_context *intel)
 {
   struct i830_context *i830 = i830_context(&intel->ctx);
   struct i830_hw_state *state = i830->current;
-   int i, ret, count;
+   int i, count;
   GLuint dirty;
   GET_CURRENT_CONTEXT(ctx);
   BATCH_LOCALS;
+   dri_bo *aper_array[3 + I830_TEX_UNITS];
+   int aper_count;

   /* We don't hold the lock at this point, so want to make sure that
    * there won't be a buffer wrap between the state emits and the primitive
@ -435,26 +438,28 @@ i830_emit_state(struct intel_context *intel)
    * Set the space as LOOP_CLIPRECTS now, since that's what our primitives
    * will be emitted under.
    */
-   intel_batchbuffer_require_space(intel->batch, get_state_size(state) + 8,
+   intel_batchbuffer_require_space(intel->batch,
+				   get_state_size(state) + INTEL_PRIM_EMIT_SIZE,
 				   LOOP_CLIPRECTS);
   count = 0;
 again:
+   aper_count = 0;
   dirty = get_dirty(state);

-   ret = 0;
+   aper_array[aper_count++] = intel->batch->buf;
   if (dirty & I830_UPLOAD_BUFFERS) {
-     ret |= dri_bufmgr_check_aperture_space(state->draw_region->buffer);
-     ret |= dri_bufmgr_check_aperture_space(state->depth_region->buffer);
+      aper_array[aper_count++] = state->draw_region->buffer;
+      aper_array[aper_count++] = state->depth_region->buffer;
   }
-   
+
   for (i = 0; i < I830_TEX_UNITS; i++)
     if (dirty & I830_UPLOAD_TEX(i)) {
 	if (state->tex_buffer[i]) {
-	  ret |= dri_bufmgr_check_aperture_space(state->tex_buffer[i]);
+	   aper_array[aper_count++] = state->tex_buffer[i];
 	}
     }

-   if (ret) {
+   if (dri_bufmgr_check_aperture_space(aper_array, aper_count)) {
       if (count == 0) {
 	   count++;
 	   intel_batchbuffer_flush(intel->batch);
@ -490,14 +495,14 @@ i830_emit_state(struct intel_context *intel)
      OUT_BATCH(state->Buffer[I830_DESTREG_CBUFADDR0]);
      OUT_BATCH(state->Buffer[I830_DESTREG_CBUFADDR1]);
      OUT_RELOC(state->draw_region->buffer,
-                DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                state->draw_region->draw_offset);

      if (state->depth_region) {
         OUT_BATCH(state->Buffer[I830_DESTREG_DBUFADDR0]);
         OUT_BATCH(state->Buffer[I830_DESTREG_DBUFADDR1]);
         OUT_RELOC(state->depth_region->buffer,
-                   DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                   state->depth_region->draw_offset);
      }

@ -524,7 +529,7 @@ i830_emit_state(struct intel_context *intel)

         if (state->tex_buffer[i]) {
            OUT_RELOC(state->tex_buffer[i],
-                      DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		      I915_GEM_DOMAIN_SAMPLER, 0,
                      state->tex_offset[i] | TM0S0_USE_FENCE);
         }
 	 else if (state == &i830->meta) {
@ -717,4 +722,5 @@ i830InitVtbl(struct i830_context *i830)
   i830->intel.vtbl.render_prevalidate = i830_render_prevalidate;
   i830->intel.vtbl.assert_not_dirty = i830_assert_not_dirty;
   i830->intel.vtbl.note_unlock = i830_note_unlock; 
+   i830->intel.vtbl.finish_batch = intel_finish_vb;
 }
--- a/src/mesa/drivers/dri/i915/i915_context.c
+++ b/src/mesa/drivers/dri/i915/i915_context.c
@ -138,6 +138,9 @@ i915CreateContext(const __GLcontextModes * mesaVis,
   _tnl_destroy_pipeline(ctx);
   _tnl_install_pipeline(ctx, intel_pipeline);

+   if (intel->no_rast)
+      FALLBACK(intel, INTEL_FALLBACK_USER, 1);
+
   ctx->Const.MaxTextureUnits = I915_TEX_UNITS;
   ctx->Const.MaxTextureImageUnits = I915_TEX_UNITS;
   ctx->Const.MaxTextureCoordUnits = I915_TEX_UNITS;
--- a/src/mesa/drivers/dri/i915/i915_reg.h
+++ b/src/mesa/drivers/dri/i915/i915_reg.h
@ -325,118 +325,6 @@
 #define SCISSOR_RECT_0_YMAX(x)		((x)<<16)
 #define SCISSOR_RECT_0_XMAX(x)		(x)

-/* p189 */
-#define _3DSTATE_LOAD_STATE_IMMEDIATE_1   ((0x3<<29)|(0x1d<<24)|(0x04<<16))
-#define I1_LOAD_S(n)                      (1<<(4+n))
-
-#define S0_VB_OFFSET_MASK              0xffffffc
-#define S0_AUTO_CACHE_INV_DISABLE      (1<<0)
-
-#define S1_VERTEX_WIDTH_SHIFT          24
-#define S1_VERTEX_WIDTH_MASK           (0x3f<<24)
-#define S1_VERTEX_PITCH_SHIFT          16
-#define S1_VERTEX_PITCH_MASK           (0x3f<<16)
-
-#define TEXCOORDFMT_2D                 0x0
-#define TEXCOORDFMT_3D                 0x1
-#define TEXCOORDFMT_4D                 0x2
-#define TEXCOORDFMT_1D                 0x3
-#define TEXCOORDFMT_2D_16              0x4
-#define TEXCOORDFMT_4D_16              0x5
-#define TEXCOORDFMT_NOT_PRESENT        0xf
-#define S2_TEXCOORD_FMT0_MASK            0xf
-#define S2_TEXCOORD_FMT1_SHIFT           4
-#define S2_TEXCOORD_FMT(unit, type)    ((type)<<(unit*4))
-#define S2_TEXCOORD_NONE               (~0)
-
-/* S3 not interesting */
-
-#define S4_POINT_WIDTH_SHIFT           23
-#define S4_POINT_WIDTH_MASK            (0x1ff<<23)
-#define S4_LINE_WIDTH_SHIFT            19
-#define S4_LINE_WIDTH_ONE              (0x2<<19)
-#define S4_LINE_WIDTH_MASK             (0xf<<19)
-#define S4_FLATSHADE_ALPHA             (1<<18)
-#define S4_FLATSHADE_FOG               (1<<17)
-#define S4_FLATSHADE_SPECULAR          (1<<16)
-#define S4_FLATSHADE_COLOR             (1<<15)
-#define S4_CULLMODE_BOTH	       (0<<13)
-#define S4_CULLMODE_NONE	       (1<<13)
-#define S4_CULLMODE_CW		       (2<<13)
-#define S4_CULLMODE_CCW		       (3<<13)
-#define S4_CULLMODE_MASK	       (3<<13)
-#define S4_VFMT_POINT_WIDTH            (1<<12)
-#define S4_VFMT_SPEC_FOG               (1<<11)
-#define S4_VFMT_COLOR                  (1<<10)
-#define S4_VFMT_DEPTH_OFFSET           (1<<9)
-#define S4_VFMT_XYZ     	       (1<<6)
-#define S4_VFMT_XYZW     	       (2<<6)
-#define S4_VFMT_XY     		       (3<<6)
-#define S4_VFMT_XYW     	       (4<<6)
-#define S4_VFMT_XYZW_MASK              (7<<6)
-#define S4_FORCE_DEFAULT_DIFFUSE       (1<<5)
-#define S4_FORCE_DEFAULT_SPECULAR      (1<<4)
-#define S4_LOCAL_DEPTH_OFFSET_ENABLE   (1<<3)
-#define S4_VFMT_FOG_PARAM              (1<<2)
-#define S4_SPRITE_POINT_ENABLE         (1<<1)
-#define S4_LINE_ANTIALIAS_ENABLE       (1<<0)
-
-#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH   | 	\
-		      S4_VFMT_SPEC_FOG      |	\
-		      S4_VFMT_COLOR         |	\
-		      S4_VFMT_DEPTH_OFFSET  |	\
-		      S4_VFMT_XYZW_MASK     |	\
-		      S4_VFMT_FOG_PARAM)
-
-
-#define S5_WRITEDISABLE_ALPHA          (1<<31)
-#define S5_WRITEDISABLE_RED            (1<<30)
-#define S5_WRITEDISABLE_GREEN          (1<<29)
-#define S5_WRITEDISABLE_BLUE           (1<<28)
-#define S5_WRITEDISABLE_MASK           (0xf<<28)
-#define S5_FORCE_DEFAULT_POINT_SIZE    (1<<27)
-#define S5_LAST_PIXEL_ENABLE           (1<<26)
-#define S5_GLOBAL_DEPTH_OFFSET_ENABLE  (1<<25)
-#define S5_FOG_ENABLE                  (1<<24)
-#define S5_STENCIL_REF_SHIFT           16
-#define S5_STENCIL_REF_MASK            (0xff<<16)
-#define S5_STENCIL_TEST_FUNC_SHIFT     13
-#define S5_STENCIL_TEST_FUNC_MASK      (0x7<<13)
-#define S5_STENCIL_FAIL_SHIFT          10
-#define S5_STENCIL_FAIL_MASK           (0x7<<10)
-#define S5_STENCIL_PASS_Z_FAIL_SHIFT   7
-#define S5_STENCIL_PASS_Z_FAIL_MASK    (0x7<<7)
-#define S5_STENCIL_PASS_Z_PASS_SHIFT   4
-#define S5_STENCIL_PASS_Z_PASS_MASK    (0x7<<4)
-#define S5_STENCIL_WRITE_ENABLE        (1<<3)
-#define S5_STENCIL_TEST_ENABLE         (1<<2)
-#define S5_COLOR_DITHER_ENABLE         (1<<1)
-#define S5_LOGICOP_ENABLE              (1<<0)
-
-
-#define S6_ALPHA_TEST_ENABLE           (1<<31)
-#define S6_ALPHA_TEST_FUNC_SHIFT       28
-#define S6_ALPHA_TEST_FUNC_MASK        (0x7<<28)
-#define S6_ALPHA_REF_SHIFT             20
-#define S6_ALPHA_REF_MASK              (0xff<<20)
-#define S6_DEPTH_TEST_ENABLE           (1<<19)
-#define S6_DEPTH_TEST_FUNC_SHIFT       16
-#define S6_DEPTH_TEST_FUNC_MASK        (0x7<<16)
-#define S6_CBUF_BLEND_ENABLE           (1<<15)
-#define S6_CBUF_BLEND_FUNC_SHIFT       12
-#define S6_CBUF_BLEND_FUNC_MASK        (0x7<<12)
-#define S6_CBUF_SRC_BLEND_FACT_SHIFT   8
-#define S6_CBUF_SRC_BLEND_FACT_MASK    (0xf<<8)
-#define S6_CBUF_DST_BLEND_FACT_SHIFT   4
-#define S6_CBUF_DST_BLEND_FACT_MASK    (0xf<<4)
-#define S6_DEPTH_WRITE_ENABLE          (1<<3)
-#define S6_COLOR_WRITE_ENABLE          (1<<2)
-#define S6_TRISTRIP_PV_SHIFT           0
-#define S6_TRISTRIP_PV_MASK            (0x3<<0)
-
-#define S7_DEPTH_OFFSET_CONST_MASK     ~0
-
-
 /* Helper macros for blend factors
 */
 #define DST_BLND_FACT(f) ((f)<<S6_CBUF_DST_BLEND_FACT_SHIFT)
@ -855,10 +743,4 @@
 #define _3DSTATE_DEFAULT_DIFFUSE    ((0x3<<29)|(0x1d<<24)|(0x99<<16))
 #define _3DSTATE_DEFAULT_SPECULAR   ((0x3<<29)|(0x1d<<24)|(0x9a<<16))

-
-#define MI_FLUSH                   ((0<<29)|(4<<23))
-#define FLUSH_MAP_CACHE            (1<<0)
-#define INHIBIT_FLUSH_RENDER_CACHE (1<<2)
-
-
 #endif
--- a/src/mesa/drivers/dri/i915/i915_vtbl.c
+++ b/src/mesa/drivers/dri/i915/i915_vtbl.c
@ -39,6 +39,7 @@
 #include "intel_batchbuffer.h"
 #include "intel_tex.h"
 #include "intel_regions.h"
+#include "intel_tris.h"

 #include "i915_reg.h"
 #include "i915_context.h"
@ -296,9 +297,9 @@ i915_emit_state(struct intel_context *intel)
 {
   struct i915_context *i915 = i915_context(&intel->ctx);
   struct i915_hw_state *state = i915->current;
-   int i;
-   int ret, count;
+   int i, count, aper_count;
   GLuint dirty;
+   dri_bo *aper_array[3 + I915_TEX_UNITS];
   GET_CURRENT_CONTEXT(ctx);
   BATCH_LOCALS;

@ -313,28 +314,32 @@ i915_emit_state(struct intel_context *intel)
    * Set the space as LOOP_CLIPRECTS now, since that's what our primitives
    * will be emitted under.
    */
-   intel_batchbuffer_require_space(intel->batch, get_state_size(state) + 8,
+   intel_batchbuffer_require_space(intel->batch,
+				   get_state_size(state) + INTEL_PRIM_EMIT_SIZE,
 				   LOOP_CLIPRECTS);
   count = 0;
 again:
+   aper_count = 0;
   dirty = get_dirty(state);

-   ret = 0;
+   aper_array[aper_count++] = intel->batch->buf;
   if (dirty & I915_UPLOAD_BUFFERS) {
-     ret |= dri_bufmgr_check_aperture_space(state->draw_region->buffer);
-     if (state->depth_region)
-        ret |= dri_bufmgr_check_aperture_space(state->depth_region->buffer);
+      aper_array[aper_count++] = state->draw_region->buffer;
+      if (state->depth_region)
+	 aper_array[aper_count++] = state->depth_region->buffer;
   }

   if (dirty & I915_UPLOAD_TEX_ALL) {
-     for (i = 0; i < I915_TEX_UNITS; i++)
-       if (dirty & I915_UPLOAD_TEX(i)) {
-	   if (state->tex_buffer[i]) {
-	       ret |= dri_bufmgr_check_aperture_space(state->tex_buffer[i]);
-	   }
-       }
+      for (i = 0; i < I915_TEX_UNITS; i++) {
+	 if (dirty & I915_UPLOAD_TEX(i)) {
+	    if (state->tex_buffer[i]) {
+	       aper_array[aper_count++] = state->tex_buffer[i];
+	    }
+	 }
+      }
   }
-   if (ret) {
+
+   if (dri_bufmgr_check_aperture_space(aper_array, aper_count)) {
       if (count == 0) {
 	   count++;
 	   intel_batchbuffer_flush(intel->batch);
@ -377,14 +382,14 @@ i915_emit_state(struct intel_context *intel)
      OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]);
      OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR1]);
      OUT_RELOC(state->draw_region->buffer,
-                DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                state->draw_region->draw_offset);

      if (state->depth_region) {
         OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR0]);
         OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR1]);
         OUT_RELOC(state->depth_region->buffer,
-                   DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                   state->depth_region->draw_offset);
      }

@ -427,7 +432,7 @@ i915_emit_state(struct intel_context *intel)

            if (state->tex_buffer[i]) {
               OUT_RELOC(state->tex_buffer[i],
-                         DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+			 I915_GEM_DOMAIN_SAMPLER, 0,
                         state->tex_offset[i]);
            }
            else if (state == &i915->meta) {
@ -629,4 +634,5 @@ i915InitVtbl(struct i915_context *i915)
   i915->intel.vtbl.flush_cmd = i915_flush_cmd;
   i915->intel.vtbl.assert_not_dirty = i915_assert_not_dirty;
   i915->intel.vtbl.note_unlock = i915_note_unlock; 
+   i915->intel.vtbl.finish_batch = intel_finish_vb;
 }
--- a/src/mesa/drivers/dri/i915/intel_bufmgr_ttm.c
+++ b/src/mesa/drivers/dri/i915/intel_bufmgr_ttm.c
@ -1 +0,0 @@
-../intel/intel_bufmgr_ttm.c
--- a/src/mesa/drivers/dri/i915/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i915/intel_pixel_read.c
@ -173,7 +173,6 @@ do_blit_readpixels(GLcontext * ctx,
   struct intel_buffer_object *dst = intel_buffer_object(pack->BufferObj);
   GLuint dst_offset;
   GLuint rowLength;
-   dri_fence *fence = NULL;

   if (INTEL_DEBUG & DEBUG_PIXEL)
      _mesa_printf("%s\n", __FUNCTION__);
@ -264,7 +263,7 @@ do_blit_readpixels(GLcontext * ctx,

         intelEmitCopyBlit(intel,
                           src->cpp,
-                           src->pitch, src->buffer, 0, src->tiled,
+                           src->pitch, src->buffer, 0, src->tiling,
                           rowLength, dst_buffer, dst_offset, GL_FALSE,
                           rect.x1,
                           rect.y1,
@ -273,19 +272,9 @@ do_blit_readpixels(GLcontext * ctx,
                           rect.x2 - rect.x1, rect.y2 - rect.y1,
 			   GL_COPY);
      }
-
-      intel_batchbuffer_flush(intel->batch);
-      fence = intel->batch->last_fence;
-      dri_fence_reference(fence);
-
   }
   UNLOCK_HARDWARE(intel);

-   if (fence) {
-      dri_fence_wait(fence);
-      dri_fence_unreference(fence);
-   }
-
   if (INTEL_DEBUG & DEBUG_PIXEL)
      _mesa_printf("%s - DONE\n", __FUNCTION__);

--- a/src/mesa/drivers/dri/i915/intel_render.c
+++ b/src/mesa/drivers/dri/i915/intel_render.c
@ -67,7 +67,7 @@

 #define HAVE_ELTS        0

-static GLuint hw_prim[GL_POLYGON + 1] = {
+static uint32_t hw_prim[GL_POLYGON + 1] = {
   0,
   PRIM3D_LINELIST,
   PRIM3D_LINESTRIP,
@ -114,7 +114,7 @@ intelDmaPrimitive(struct intel_context *intel, GLenum prim)
      fprintf(stderr, "%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(prim));
   INTEL_FIREVERTICES(intel);
   intel->vtbl.reduced_primitive_state(intel, reduced_prim[prim]);
-   intelStartInlinePrimitive(intel, hw_prim[prim], LOOP_CLIPRECTS);
+   intel_set_prim(intel, hw_prim[prim]);
 }


@ -126,12 +126,11 @@ do {						\

 #define FLUSH() INTEL_FIREVERTICES(intel)

-#define GET_SUBSEQUENT_VB_MAX_VERTS() \
-  ((intel->batch->size - 1500) / (intel->vertex_size*4))
-#define GET_CURRENT_VB_MAX_VERTS() GET_SUBSEQUENT_VB_MAX_VERTS()
+#define GET_SUBSEQUENT_VB_MAX_VERTS() (INTEL_VB_SIZE / (intel->vertex_size * 4))
+#define GET_CURRENT_VB_MAX_VERTS() \
+   ((INTEL_VB_SIZE - intel->prim.current_offset) / (intel->vertex_size * 4))

-#define ALLOC_VERTS( nr ) \
-   intelExtendInlinePrimitive( intel, (nr) * intel->vertex_size )
+#define ALLOC_VERTS(nr) intel_get_prim_space(intel, nr)

 #define EMIT_VERTS( ctx, j, nr, buf ) \
  _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf )
--- a/src/mesa/drivers/dri/i915/intel_tris.c
+++ b/src/mesa/drivers/dri/i915/intel_tris.c
@ -25,6 +25,12 @@
 * 
 **************************************************************************/

+/** @file intel_tris.c
+ *
+ * This file contains functions for managing the vertex buffer and emitting
+ * primitives into it.
+ */
+
 #include "glheader.h"
 #include "context.h"
 #include "macros.h"
@ -47,111 +53,185 @@
 #include "intel_reg.h"
 #include "intel_span.h"
 #include "intel_tex.h"
+#include "intel_chipset.h"
+#include "i830_context.h"
+#include "i830_reg.h"

 static void intelRenderPrimitive(GLcontext * ctx, GLenum prim);
 static void intelRasterPrimitive(GLcontext * ctx, GLenum rprim,
                                 GLuint hwprim);

-/*
- */
-static void
-intel_flush_inline_primitive(struct intel_context *intel)
+/** Sets the primitive type for a primitive sequence, flushing as needed. */
+void intel_set_prim(struct intel_context *intel, uint32_t prim)
 {
-   GLuint used = intel->batch->ptr - intel->prim.start_ptr;
-
-   assert(intel->prim.primitive != ~0);
-
-/*    _mesa_printf("/\n"); */
-
-   if (used < 8)
-      goto do_discard;
-
-   *(int *) intel->prim.start_ptr = (_3DPRIMITIVE |
-                                     intel->prim.primitive | (used / 4 - 2));
-
-   goto finished;
-
- do_discard:
-   intel->batch->ptr -= used;
-
- finished:
-   intel->prim.primitive = ~0;
-   intel->prim.start_ptr = 0;
-   intel->prim.flush = 0;
+   if (prim != intel->prim.primitive) {
+      INTEL_FIREVERTICES(intel);
+      intel->prim.primitive = prim;
+   }
 }

+/** Returns mapped VB space for the given number of vertices */
+uint32_t *intel_get_prim_space(struct intel_context *intel, unsigned int count)
+{
+   uint32_t *addr;

-/* Emit a primitive referencing vertices in a vertex buffer.
- */
-void
-intelStartInlinePrimitive(struct intel_context *intel,
-                          GLuint prim, GLuint batch_flags)
+   /* Check for space in the existing VB */
+   if (intel->prim.vb_bo == NULL ||
+       (intel->prim.current_offset +
+	count * intel->vertex_size * 4) > INTEL_VB_SIZE ||
+       (intel->prim.count + count) >= (1 << 16)) {
+      /* Flush existing prim if any */
+      INTEL_FIREVERTICES(intel);
+
+      intel_finish_vb(intel);
+
+      /* Start a new VB */
+      if (intel->prim.vb == NULL)
+	 intel->prim.vb = malloc(INTEL_VB_SIZE);
+      intel->prim.vb_bo = dri_bo_alloc(intel->bufmgr, "vb",
+				       INTEL_VB_SIZE, 4);
+      intel->prim.start_offset = 0;
+      intel->prim.current_offset = 0;
+   }
+
+   intel->prim.flush = intel_flush_prim;
+
+   addr = (uint32_t *)(intel->prim.vb + intel->prim.current_offset);
+   intel->prim.current_offset += intel->vertex_size * 4 * count;
+   intel->prim.count += count;
+
+   return addr;
+}
+
+/** Dispatches the accumulated primitive to the batchbuffer. */
+void intel_flush_prim(struct intel_context *intel)
 {
   BATCH_LOCALS;
+   dri_bo *aper_array[2];
+   dri_bo *vb_bo;
+
+   /* Must be called after an intel_start_prim. */
+   assert(intel->prim.primitive != ~0);
+
+   if (intel->prim.count == 0)
+      return;
+
+   /* Keep a reference on the BO as it may get finished as we start the
+    * batch emit.
+    */
+   vb_bo = intel->prim.vb_bo;
+   dri_bo_reference(vb_bo);

   intel_wait_flips(intel);

   intel->vtbl.emit_state(intel);

+   aper_array[0] = intel->batch->buf;
+   aper_array[1] = vb_bo;
+   if (dri_bufmgr_check_aperture_space(aper_array, 2)) {
+      intel_batchbuffer_flush(intel->batch);
+      intel->vtbl.emit_state(intel);
+   }
+
+   /* Ensure that we don't start a new batch for the following emit, which
+    * depends on the state just emitted. emit_state should be making sure we
+    * have the space for this.
+    */
   intel->no_batch_wrap = GL_TRUE;

-/*    _mesa_printf("%s *", __progname); */
-
-   /* Emit a slot which will be filled with the inline primitive
-    * command later.
+   /* Check that we actually emitted the state into this batch, using the
+    * UPLOAD_CTX bit as the signal.
    */
-   BEGIN_BATCH(2, batch_flags);
-   OUT_BATCH(0);
-
   assert((intel->batch->dirty_state & (1<<1)) == 0);

-   intel->prim.start_ptr = intel->batch->ptr;
-   intel->prim.primitive = prim;
-   intel->prim.flush = intel_flush_inline_primitive;
+#if 0
+   printf("emitting %d..%d=%d vertices size %d\n", intel->prim.start_offset,
+	  intel->prim.current_offset, intel->prim.count,
+	  intel->vertex_size * 4);
+#endif

-   OUT_BATCH(0);
-   ADVANCE_BATCH();
+   if (IS_9XX(intel->intelScreen->deviceID)) {
+      BEGIN_BATCH(5, LOOP_CLIPRECTS);
+      OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
+		I1_LOAD_S(0) | I1_LOAD_S(1) | 1);
+      assert((intel->prim.start_offset & !S0_VB_OFFSET_MASK) == 0);
+      OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0,
+		intel->prim.start_offset);
+      OUT_BATCH((intel->vertex_size << S1_VERTEX_WIDTH_SHIFT) |
+		(intel->vertex_size << S1_VERTEX_PITCH_SHIFT));
+
+      OUT_BATCH(_3DPRIMITIVE |
+		PRIM_INDIRECT |
+		PRIM_INDIRECT_SEQUENTIAL |
+		intel->prim.primitive |
+		intel->prim.count);
+      OUT_BATCH(0); /* Beginning vertex index */
+      ADVANCE_BATCH();
+   } else {
+      struct i830_context *i830 = i830_context(&intel->ctx);
+
+      BEGIN_BATCH(5, LOOP_CLIPRECTS);
+      OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
+		I1_LOAD_S(0) | I1_LOAD_S(2) | 1);
+      /* S0 */
+      assert((intel->prim.start_offset & !S0_VB_OFFSET_MASK_830) == 0);
+      OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0,
+		intel->prim.start_offset |
+		(intel->vertex_size << S0_VB_PITCH_SHIFT_830) |
+		S0_VB_ENABLE_830);
+      /* S1
+       * This is somewhat unfortunate -- VB width is tied up with
+       * vertex format data that we've already uploaded through
+       * _3DSTATE_VFT[01]_CMD.  We may want to replace emits of VFT state with
+       * STATE_IMMEDIATE_1 like this to avoid duplication.
+       */
+      OUT_BATCH((i830->state.Ctx[I830_CTXREG_VF] & VFT0_TEX_COUNT_MASK) >>
+		VFT0_TEX_COUNT_SHIFT << S2_TEX_COUNT_SHIFT_830 |
+		(i830->state.Ctx[I830_CTXREG_VF2] << 16) |
+		intel->vertex_size << S2_VERTEX_0_WIDTH_SHIFT_830);
+
+      OUT_BATCH(_3DPRIMITIVE |
+		PRIM_INDIRECT |
+		PRIM_INDIRECT_SEQUENTIAL |
+		intel->prim.primitive |
+		intel->prim.count);
+      OUT_BATCH(0); /* Beginning vertex index */
+      ADVANCE_BATCH();
+   }

   intel->no_batch_wrap = GL_FALSE;

-/*    _mesa_printf(">"); */
+   intel->prim.flush = NULL;
+   intel->prim.start_offset = intel->prim.current_offset;
+   if (!IS_9XX(intel->intelScreen->deviceID))
+      intel->prim.start_offset = ALIGN(intel->prim.start_offset, 128);
+   intel->prim.count = 0;
+
+   dri_bo_unreference(vb_bo);
 }

-
-void
-intelWrapInlinePrimitive(struct intel_context *intel)
+/**
+ * Uploads the locally-accumulated VB into the buffer object.
+ *
+ * This avoids us thrashing the cachelines in and out as the buffer gets
+ * filled, dispatched, then reused as the hardware completes rendering from it,
+ * and also lets us clflush less if we dispatch with a partially-filled VB.
+ *
+ * This is called normally from get_space when we're finishing a BO, but also
+ * at batch flush time so that we don't try accessing the contents of a
+ * just-dispatched buffer.
+ */
+void intel_finish_vb(struct intel_context *intel)
 {
-   GLuint prim = intel->prim.primitive;
-   enum cliprect_mode cliprect_mode = intel->batch->cliprect_mode;
+   if (intel->prim.vb_bo == NULL)
+      return;

-   intel_flush_inline_primitive(intel);
-   intel_batchbuffer_flush(intel->batch);
-   intelStartInlinePrimitive(intel, prim, cliprect_mode);  /* ??? */
+   dri_bo_subdata(intel->prim.vb_bo, 0, intel->prim.start_offset,
+		  intel->prim.vb);
+   dri_bo_unreference(intel->prim.vb_bo);
+   intel->prim.vb_bo = NULL;
 }

-GLuint *
-intelExtendInlinePrimitive(struct intel_context *intel, GLuint dwords)
-{
-   GLuint sz = dwords * sizeof(GLuint);
-   GLuint *ptr;
-
-   assert(intel->prim.flush == intel_flush_inline_primitive);
-
-   if (intel_batchbuffer_space(intel->batch) < sz)
-      intelWrapInlinePrimitive(intel);
-
-/*    _mesa_printf("."); */
-
-   intel->vtbl.assert_not_dirty(intel);
-
-   ptr = (GLuint *) intel->batch->ptr;
-   intel->batch->ptr += sz;
-
-   return ptr;
-}
-
-
-
 /***********************************************************************
 *                    Emit primitives as inline vertices               *
 ***********************************************************************/
@ -182,7 +262,7 @@ intel_draw_quad(struct intel_context *intel,
                intelVertexPtr v1, intelVertexPtr v2, intelVertexPtr v3)
 {
   GLuint vertsize = intel->vertex_size;
-   GLuint *vb = intelExtendInlinePrimitive(intel, 6 * vertsize);
+   GLuint *vb = intel_get_prim_space(intel, 6);
   int j;

   COPY_DWORDS(j, vb, vertsize, v0);
@ -210,7 +290,7 @@ intel_draw_triangle(struct intel_context *intel,
                    intelVertexPtr v0, intelVertexPtr v1, intelVertexPtr v2)
 {
   GLuint vertsize = intel->vertex_size;
-   GLuint *vb = intelExtendInlinePrimitive(intel, 3 * vertsize);
+   GLuint *vb = intel_get_prim_space(intel, 3);
   int j;

   COPY_DWORDS(j, vb, vertsize, v0);
@ -224,7 +304,7 @@ intel_draw_line(struct intel_context *intel,
                intelVertexPtr v0, intelVertexPtr v1)
 {
   GLuint vertsize = intel->vertex_size;
-   GLuint *vb = intelExtendInlinePrimitive(intel, 2 * vertsize);
+   GLuint *vb = intel_get_prim_space(intel, 2);
   int j;

   COPY_DWORDS(j, vb, vertsize, v0);
@ -236,7 +316,7 @@ static void
 intel_draw_point(struct intel_context *intel, intelVertexPtr v0)
 {
   GLuint vertsize = intel->vertex_size;
-   GLuint *vb = intelExtendInlinePrimitive(intel, vertsize);
+   GLuint *vb = intel_get_prim_space(intel, 1);
   int j;

   /* Adjust for sub pixel position -- still required for conform. */
@ -745,7 +825,7 @@ intelFastRenderClippedPoly(GLcontext * ctx, const GLuint * elts, GLuint n)
 {
   struct intel_context *intel = intel_context(ctx);
   const GLuint vertsize = intel->vertex_size;
-   GLuint *vb = intelExtendInlinePrimitive(intel, (n - 2) * 3 * vertsize);
+   GLuint *vb = intel_get_prim_space(intel, (n - 2) * 3);
   GLubyte *vertptr = (GLubyte *) intel->verts;
   const GLuint *start = (const GLuint *) V(elts[0]);
   int i, j;
@ -950,7 +1030,7 @@ intelRasterPrimitive(GLcontext * ctx, GLenum rprim, GLuint hwprim)
   if (hwprim != intel->prim.primitive) {
      INTEL_FIREVERTICES(intel);

-      intelStartInlinePrimitive(intel, hwprim, LOOP_CLIPRECTS);
+      intel_set_prim(intel, hwprim);
   }
 }

@ -1083,15 +1163,18 @@ intel_meta_draw_poly(struct intel_context *intel,
   union fi *vb;
   GLint i;
   GLboolean was_locked = intel->locked;
+   unsigned int saved_vertex_size = intel->vertex_size;

   if (!was_locked)
       LOCK_HARDWARE(intel);

+   intel->vertex_size = 6;
+
   /* All 3d primitives should be emitted with LOOP_CLIPRECTS,
    * otherwise the drawing origin (DR4) might not be set correctly.
    */
-   intelStartInlinePrimitive(intel, PRIM3D_TRIFAN, LOOP_CLIPRECTS);
-   vb = (union fi *) intelExtendInlinePrimitive(intel, n * 6);
+   intel_set_prim(intel, PRIM3D_TRIFAN);
+   vb = (union fi *) intel_get_prim_space(intel, n);

   for (i = 0; i < n; i++) {
      vb[0].f = xy[i][0];
@ -1105,6 +1188,8 @@ intel_meta_draw_poly(struct intel_context *intel,

   INTEL_FIREVERTICES(intel);

+   intel->vertex_size = saved_vertex_size;
+
   if (!was_locked)
       UNLOCK_HARDWARE(intel);
 }
--- a/src/mesa/drivers/dri/i915/intel_tris.h
+++ b/src/mesa/drivers/dri/i915/intel_tris.h
@ -30,7 +30,9 @@

 #include "mtypes.h"

-
+#define INTEL_VB_SIZE		(32 * 1024)
+/** 3 dwords of state_immediate and 2 of 3dprim, in intel_flush_prim */
+#define INTEL_PRIM_EMIT_SIZE	(5 * 4)

 #define _INTEL_NEW_RENDERSTATE (_DD_NEW_LINE_STIPPLE |		\
 			       _DD_NEW_TRI_UNFILLED |		\
@ -44,11 +46,9 @@ extern void intelInitTriFuncs(GLcontext * ctx);

 extern void intelChooseRenderState(GLcontext * ctx);

-extern void intelStartInlinePrimitive(struct intel_context *intel,
-                                      GLuint prim, GLuint flags);
-extern void intelWrapInlinePrimitive(struct intel_context *intel);
-
-GLuint *intelExtendInlinePrimitive(struct intel_context *intel,
-                                   GLuint dwords);
+void intel_set_prim(struct intel_context *intel, uint32_t prim);
+GLuint *intel_get_prim_space(struct intel_context *intel, unsigned int count);
+void intel_flush_prim(struct intel_context *intel);
+void intel_finish_vb(struct intel_context *intel);

 #endif
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@ -9,7 +9,6 @@ DRIVER_SOURCES = \
 	intel_blit.c \
 	intel_buffer_objects.c \
 	intel_buffers.c \
-	intel_bufmgr_ttm.c \
 	intel_context.c \
 	intel_decode.c \
 	intel_depthstencil.c \
@ -85,7 +84,6 @@ DRIVER_SOURCES = \

 C_SOURCES = \
 	$(COMMON_SOURCES) \
-	$(COMMON_BM_SOURCES) \
 	$(MINIGLX_SOURCES) \
 	$(DRIVER_SOURCES)

--- a/src/mesa/drivers/dri/i965/brw_cc.c
+++ b/src/mesa/drivers/dri/i965/brw_cc.c
@ -37,7 +37,7 @@
 #include "macros.h"
 #include "enums.h"

-static int upload_cc_vp( struct brw_context *brw )
+static void prepare_cc_vp( struct brw_context *brw )
 {
   struct brw_cc_viewport ccv;

@ -48,7 +48,6 @@ static int upload_cc_vp( struct brw_context *brw )

   dri_bo_unreference(brw->cc.vp_bo);
   brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 );
-   return dri_bufmgr_check_aperture_space(brw->cc.vp_bo);
 }

 const struct brw_tracked_state brw_cc_vp = {
@ -57,7 +56,7 @@ const struct brw_tracked_state brw_cc_vp = {
      .brw = BRW_NEW_CONTEXT,
      .cache = 0
   },
-   .prepare = upload_cc_vp
+   .prepare = prepare_cc_vp
 };

 struct brw_cc_unit_key {
@ -256,16 +255,17 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
 			 NULL, NULL);

   /* Emit CC viewport relocation */
-   dri_emit_reloc(bo,
-		  DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
-		  0,
-		  offsetof(struct brw_cc_unit_state, cc4),
-		  brw->cc.vp_bo);
+   intel_bo_emit_reloc(bo,
+		       I915_GEM_DOMAIN_INSTRUCTION,
+		       0,
+		       0,
+		       offsetof(struct brw_cc_unit_state, cc4),
+		       brw->cc.vp_bo);

   return bo;
 }

-static int prepare_cc_unit( struct brw_context *brw )
+static void prepare_cc_unit( struct brw_context *brw )
 {
   struct brw_cc_unit_key key;

@ -279,7 +279,6 @@ static int prepare_cc_unit( struct brw_context *brw )

   if (brw->cc.state_bo == NULL)
      brw->cc.state_bo = cc_unit_create_from_key(brw, &key);
-   return dri_bufmgr_check_aperture_space(brw->cc.state_bo);
 }

 const struct brw_tracked_state brw_cc_unit = {
--- a/src/mesa/drivers/dri/i965/brw_clip.c
+++ b/src/mesa/drivers/dri/i965/brw_clip.c
@ -131,7 +131,7 @@ static void compile_clip_prog( struct brw_context *brw,

 /* Calculate interpolants for triangle and line rasterization.
 */
-static int upload_clip_prog( struct brw_context *brw )
+static void upload_clip_prog(struct brw_context *brw)
 {
   GLcontext *ctx = &brw->intel.ctx;
   struct brw_clip_prog_key key;
@ -242,8 +242,6 @@ static int upload_clip_prog( struct brw_context *brw )
 					&brw->clip.prog_data);
   if (brw->clip.prog_bo == NULL)
      compile_clip_prog( brw, &key );
-
-   return dri_bufmgr_check_aperture_space(brw->clip.prog_bo);
 }


--- a/src/mesa/drivers/dri/i965/brw_clip_state.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_state.c
@ -119,19 +119,19 @@ clip_unit_create_from_key(struct brw_context *brw,

   /* Emit clip program relocation */
   assert(brw->clip.prog_bo);
-   dri_emit_reloc(bo,
-		  DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
-		  clip.thread0.grf_reg_count << 1,
-		  offsetof(struct brw_clip_unit_state, thread0),
-		  brw->clip.prog_bo);
+   intel_bo_emit_reloc(bo,
+		       I915_GEM_DOMAIN_INSTRUCTION,
+		       0,
+		       clip.thread0.grf_reg_count << 1,
+		       offsetof(struct brw_clip_unit_state, thread0),
+		       brw->clip.prog_bo);

   return bo;
 }

-static int upload_clip_unit( struct brw_context *brw )
+static void upload_clip_unit( struct brw_context *brw )
 {
   struct brw_clip_unit_key key;
-   int ret = 0;

   clip_unit_populate_key(brw, &key);

@ -143,9 +143,6 @@ static int upload_clip_unit( struct brw_context *brw )
   if (brw->clip.state_bo == NULL) {
      brw->clip.state_bo = clip_unit_create_from_key(brw, &key);
   }
-
-   ret = dri_bufmgr_check_aperture_space(brw->clip.state_bo);
-   return ret;
 }

 const struct brw_tracked_state brw_clip_unit = {
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@ -134,7 +134,6 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
   ctx->Const.Max3DTextureLevels = 9;
   ctx->Const.MaxCubeTextureLevels = 12;
   ctx->Const.MaxTextureRectSize = (1<<11);
-   ctx->Const.MaxTextureUnits = BRW_MAX_TEX_UNIT;
   
 /*    ctx->Const.MaxNativeVertexProgramTemps = 32; */

--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@ -135,6 +135,8 @@ struct brw_context;
 #define BRW_NEW_METAOPS                 0x1000
 #define BRW_NEW_FENCE                   0x2000
 #define BRW_NEW_LOCK                    0x4000
+#define BRW_NEW_INDICES			0x8000
+#define BRW_NEW_VERTICES		0x10000
 /**
 * Used for any batch entry with a relocated pointer that will be used
 * by any 3D rendering.
@ -332,7 +334,7 @@ struct brw_state_pointers {
 */
 struct brw_tracked_state {
   struct brw_state_flags dirty;
-   int (*prepare)( struct brw_context *brw );
+   void (*prepare)( struct brw_context *brw );
   void (*emit)( struct brw_context *brw );
 };

@ -450,8 +452,21 @@ struct brw_context
       * for changes to this state:
       */
      struct brw_vertex_info info;
+      unsigned int min_index, max_index;
   } vb;

+   struct {
+      /**
+       * Index buffer for this draw_prims call.
+       *
+       * Updates are signaled by BRW_NEW_INDICES.
+       */
+      const struct _mesa_index_buffer *ib;
+
+      dri_bo *bo;
+      unsigned int offset;
+   } ib;
+
   struct {
      /* Will be allocated on demand if needed.   
       */
@ -641,7 +656,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
 /*======================================================================
 * brw_state.c
 */
-int brw_validate_state( struct brw_context *brw );
+void brw_validate_state( struct brw_context *brw );
 void brw_init_state( struct brw_context *brw );
 void brw_destroy_state( struct brw_context *brw );

--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@ -46,7 +46,7 @@

 /* Partition the CURBE between the various users of constant values:
 */
-static int calculate_curbe_offsets( struct brw_context *brw )
+static void calculate_curbe_offsets( struct brw_context *brw )
 {
   /* CACHE_NEW_WM_PROG */
   GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
@ -117,7 +117,6 @@ static int calculate_curbe_offsets( struct brw_context *brw )

      brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS;
   }
-   return 0;
 }


@ -156,19 +155,7 @@ void brw_upload_constant_buffer_state(struct brw_context *brw)

   assert(brw->urb.nr_cs_entries);
   BRW_CACHED_BATCH_STRUCT(brw, &cbs);
-}      
-
-#if 0
-const struct brw_tracked_state brw_constant_buffer_state = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_URB_FENCE,
-      .cache = 0
-   },
-   .update = brw_upload_constant_buffer_state
-};
-#endif
-
+}

 static GLfloat fixed_plane[6][4] = {
   { 0,    0,   -1, 1 },
@ -183,7 +170,7 @@ static GLfloat fixed_plane[6][4] = {
 * cache mechanism, but maybe would benefit from a comparison against
 * the current uploaded set of constants.
 */
-static int prepare_constant_buffer(struct brw_context *brw)
+static void prepare_constant_buffer(struct brw_context *brw)
 {
   GLcontext *ctx = &brw->intel.ctx;
   struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program;
@ -207,8 +194,8 @@ static int prepare_constant_buffer(struct brw_context *brw)
 	 brw->curbe.last_buf = NULL;
 	 brw->curbe.last_bufsz  = 0;
      }
-       
-      return 0;
+
+      return;
   }

   buf = (GLfloat *)malloc(bufsz);
@ -306,10 +293,7 @@ static int prepare_constant_buffer(struct brw_context *brw)
 	  * They're generally around 64b.
 	  */
 	 brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE",
-					    4096, 1 << 6,
-					    DRM_BO_FLAG_MEM_LOCAL |
-					    DRM_BO_FLAG_CACHED |
-					    DRM_BO_FLAG_CACHED_MAPPED);
+					    4096, 1 << 6);
 	 brw->curbe.curbe_next_offset = 0;
      }

@ -336,9 +320,6 @@ static int prepare_constant_buffer(struct brw_context *brw)
    * flushes as necessary when doublebuffering of CURBEs isn't
    * possible.
    */
-
-   /* check aperture space for this bo */
-   return dri_bufmgr_check_aperture_space(brw->curbe.curbe_bo);
 }


@ -346,6 +327,13 @@ static void emit_constant_buffer(struct brw_context *brw)
 {
   struct intel_context *intel = &brw->intel;
   GLuint sz = brw->curbe.total_size;
+   dri_bo *aper_array[] = {
+      brw->intel.batch->buf,
+      brw->curbe.curbe_bo,
+   };
+
+   if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array)))
+      intel_batchbuffer_flush(intel->batch);

   BEGIN_BATCH(2, IGNORE_CLIPRECTS);
   if (sz == 0) {
@ -353,7 +341,8 @@ static void emit_constant_buffer(struct brw_context *brw)
      OUT_BATCH(0);
   } else {
      OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
-      OUT_RELOC(brw->curbe.curbe_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+      OUT_RELOC(brw->curbe.curbe_bo,
+		I915_GEM_DOMAIN_INSTRUCTION, 0,
 		(sz - 1) + brw->curbe.curbe_offset);
   }
   ADVANCE_BATCH();
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@ -33,69 +33,6 @@
 #ifndef BRW_DEFINES_H
 #define BRW_DEFINES_H

-/*
- */
-#define MI_NOOP                              0x00
-#define MI_USER_INTERRUPT                    0x02
-#define MI_WAIT_FOR_EVENT                    0x03
-#define MI_FLUSH                             0x04
-#define MI_REPORT_HEAD                       0x07
-#define MI_ARB_ON_OFF                        0x08
-#define MI_BATCH_BUFFER_END                  0x0A
-#define MI_OVERLAY_FLIP                      0x11
-#define MI_LOAD_SCAN_LINES_INCL              0x12
-#define MI_LOAD_SCAN_LINES_EXCL              0x13
-#define MI_DISPLAY_BUFFER_INFO               0x14
-#define MI_SET_CONTEXT                       0x18
-#define MI_STORE_DATA_IMM                    0x20
-#define MI_STORE_DATA_INDEX                  0x21
-#define MI_LOAD_REGISTER_IMM                 0x22
-#define MI_STORE_REGISTER_MEM                0x24
-#define MI_BATCH_BUFFER_START                0x31
-
-#define MI_SYNCHRONOUS_FLIP                  0x0 
-#define MI_ASYNCHRONOUS_FLIP                 0x1
-
-#define MI_BUFFER_SECURE                     0x0 
-#define MI_BUFFER_NONSECURE                  0x1
-
-#define MI_ARBITRATE_AT_CHAIN_POINTS         0x0 
-#define MI_ARBITRATE_BETWEEN_INSTS           0x1
-#define MI_NO_ARBITRATION                    0x3 
-
-#define MI_CONDITION_CODE_WAIT_DISABLED      0x0
-#define MI_CONDITION_CODE_WAIT_0             0x1
-#define MI_CONDITION_CODE_WAIT_1             0x2
-#define MI_CONDITION_CODE_WAIT_2             0x3
-#define MI_CONDITION_CODE_WAIT_3             0x4
-#define MI_CONDITION_CODE_WAIT_4             0x5
-
-#define MI_DISPLAY_PIPE_A                    0x0
-#define MI_DISPLAY_PIPE_B                    0x1
-
-#define MI_DISPLAY_PLANE_A                   0x0 
-#define MI_DISPLAY_PLANE_B                   0x1
-#define MI_DISPLAY_PLANE_C                   0x2
-
-#define MI_STANDARD_FLIP                                 0x0
-#define MI_ENQUEUE_FLIP_PERFORM_BASE_FRAME_NUMBER_LOAD   0x1
-#define MI_ENQUEUE_FLIP_TARGET_FRAME_NUMBER_RELATIVE     0x2
-#define MI_ENQUEUE_FLIP_ABSOLUTE_TARGET_FRAME_NUMBER     0x3
-
-#define MI_PHYSICAL_ADDRESS                  0x0
-#define MI_VIRTUAL_ADDRESS                   0x1
-
-#define MI_BUFFER_MEMORY_MAIN                0x0 
-#define MI_BUFFER_MEMORY_GTT                 0x2
-#define MI_BUFFER_MEMORY_PER_PROCESS_GTT     0x3 
-
-#define MI_FLIP_CONTINUE                     0x0
-#define MI_FLIP_ON                           0x1
-#define MI_FLIP_OFF                          0x2
-
-#define MI_UNTRUSTED_REGISTER_SPACE          0x0
-#define MI_TRUSTED_REGISTER_SPACE            0x1
-
 /* 3D state:
 */
 #define _3DOP_3DSTATE_PIPELINED       0x0
@ -119,7 +56,6 @@
 #define _3DSTATE_LINE_STIPPLE                 0x08
 #define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP    0x09
 #define _3DCONTROL    0x00
-#define _3DPRIMITIVE  0x00

 #define PIPE_CONTROL_NOWRITE          0x00
 #define PIPE_CONTROL_WRITEIMMEDIATE   0x01
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@ -83,9 +83,8 @@ static const GLenum reduced_prim[GL_POLYGON+1] = {
 * programs be immune to the active primitive (ie. cope with all
 * possibilities).  That may not be realistic however.
 */
-static GLuint brw_set_prim(struct brw_context *brw, GLenum prim, GLboolean *need_flush)
+static GLuint brw_set_prim(struct brw_context *brw, GLenum prim)
 {
-   int ret;
   if (INTEL_DEBUG & DEBUG_PRIMS)
      _mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim));
   
@ -106,9 +105,7 @@ static GLuint brw_set_prim(struct brw_context *brw, GLenum prim, GLboolean *need
 	 brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE;
      }

-      ret = brw_validate_state(brw);
-      if (ret)
-         *need_flush = GL_TRUE;
+      brw_validate_state(brw);
   }

   return hw_prim[prim];
@ -131,7 +128,6 @@ static void brw_emit_prim( struct brw_context *brw,

 {
   struct brw_3d_primitive prim_packet;
-   GLboolean need_flush = GL_FALSE;

   if (INTEL_DEBUG & DEBUG_PRIMS)
      _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), 
@ -140,7 +136,7 @@ static void brw_emit_prim( struct brw_context *brw,
   prim_packet.header.opcode = CMD_3D_PRIM;
   prim_packet.header.length = sizeof(prim_packet)/4 - 2;
   prim_packet.header.pad = 0;
-   prim_packet.header.topology = brw_set_prim(brw, prim->mode, &need_flush);
+   prim_packet.header.topology = brw_set_prim(brw, prim->mode);
   prim_packet.header.indexed = prim->indexed;

   prim_packet.verts_per_instance = trim(prim->mode, prim->count);
@ -149,12 +145,13 @@ static void brw_emit_prim( struct brw_context *brw,
   prim_packet.start_instance_location = 0;
   prim_packet.base_vert_location = 0;

+   /* Can't wrap here, since we rely on the validated state. */
+   brw->no_batch_wrap = GL_TRUE;
   if (prim_packet.verts_per_instance) {
      intel_batchbuffer_data( brw->intel.batch, &prim_packet,
 			      sizeof(prim_packet), LOOP_CLIPRECTS);
   }
-
-   assert(need_flush == GL_FALSE);
+   brw->no_batch_wrap = GL_FALSE;
 }

 static void brw_merge_inputs( struct brw_context *brw,
@ -258,10 +255,6 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
   struct brw_context *brw = brw_context(ctx);
   GLboolean retval = GL_FALSE;
   GLuint i;
-   GLuint ib_offset;
-   dri_bo *ib_bo;
-   GLboolean force_flush = GL_FALSE;
-   int ret;

   if (ctx->NewState)
      _mesa_update_state( ctx );
@ -271,7 +264,13 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
   /* Bind all inputs, derive varying and size information:
    */
   brw_merge_inputs( brw, arrays );
-      
+
+   brw->ib.ib = ib;
+   brw->state.dirty.brw |= BRW_NEW_INDICES;
+
+   brw->vb.min_index = min_index;
+   brw->vb.max_index = max_index;
+   brw->state.dirty.brw |= BRW_NEW_VERTICES;
   /* Have to validate state quite late.  Will rebuild tnl_program,
    * which depends on varying information.  
    * 
@ -294,29 +293,18 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
       * an upper bound of how much we might emit in a single
       * brw_try_draw_prims().
       */
-   flush:
-      if (force_flush)
-         brw->no_batch_wrap = GL_FALSE;
-
      if (intel->batch->ptr - intel->batch->map > intel->batch->size * 3 / 4
 	/* brw_emit_prim may change the cliprect_mode to LOOP_CLIPRECTS */
-	  || intel->batch->cliprect_mode != LOOP_CLIPRECTS || (force_flush == GL_TRUE))
+	  || intel->batch->cliprect_mode != LOOP_CLIPRECTS)
 	      intel_batchbuffer_flush(intel->batch);

-      force_flush = GL_FALSE;
-      brw->no_batch_wrap = GL_TRUE;
-
      /* Set the first primitive early, ahead of validate_state:
       */
-      brw_set_prim(brw, prim[0].mode, &force_flush);
+      brw_set_prim(brw, prim[0].mode);

      /* XXX:  Need to separate validate and upload of state.  
       */
-      ret = brw_validate_state( brw );
-      if (ret) {
-         force_flush = GL_TRUE;
-         goto flush;
-      }
+      brw_validate_state( brw );

      /* Various fallback checks:
       */
@ -326,31 +314,6 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
      if (check_fallbacks( brw, prim, nr_prims ))
 	 goto out;

-      /* need to account for index buffer and vertex buffer */
-      if (ib) {
-         ret = brw_prepare_indices( brw, ib , &ib_bo, &ib_offset);
-         if (ret) {
-            force_flush = GL_TRUE;
-            goto flush;
-         }
-      }
-
-      ret = brw_prepare_vertices( brw, min_index, max_index);
-      if (ret < 0)
-         goto out;
-
-      if (ret > 0) {
-         force_flush = GL_TRUE;
-         goto flush;
-      }
-	  
-      /* Upload index, vertex data: 
-       */
-      if (ib)
-	brw_emit_indices( brw, ib, ib_bo, ib_offset);
-
-      brw_emit_vertices( brw, min_index, max_index);
-
      for (i = 0; i < nr_prims; i++) {
 	 brw_emit_prim(brw, &prim[i]);
      }
@ -359,9 +322,6 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
   }

 out:
-
-   brw->no_batch_wrap = GL_FALSE;
-
   UNLOCK_HARDWARE(intel);

   if (!retval)
--- a/src/mesa/drivers/dri/i965/brw_draw.h
+++ b/src/mesa/drivers/dri/i965/brw_draw.h
@ -51,27 +51,4 @@ void brw_draw_destroy( struct brw_context *brw );
 void brw_init_current_values(GLcontext *ctx,
 			     struct gl_client_array *arrays);

-
-/* brw_draw_upload.c
- */
-int brw_prepare_indices( struct brw_context *brw,
-			 const struct _mesa_index_buffer *index_buffer,
-			 dri_bo **bo_return,
-			 GLuint *offset_return);
-
-void brw_emit_indices( struct brw_context *brw,
-		       const struct _mesa_index_buffer *index_buffer,
-		       dri_bo *bo,
-		       GLuint offset);
-
-int brw_prepare_vertices( struct brw_context *brw,
-			       GLuint min_index,
-			       GLuint max_index );
-
-void brw_emit_vertices( struct brw_context *brw,
-			       GLuint min_index,
-			       GLuint max_index );
-
-
-
 #endif
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@ -247,10 +247,7 @@ static void wrap_buffers( struct brw_context *brw,
   if (brw->vb.upload.bo != NULL)
      dri_bo_unreference(brw->vb.upload.bo);
   brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO",
-				    size, 1,
-				    DRM_BO_FLAG_MEM_LOCAL |
-				    DRM_BO_FLAG_CACHED |
-				    DRM_BO_FLAG_CACHED_MAPPED);
+				    size, 1);

   /* Set the internal VBO\ to no-backing-store.  We only use them as a
    * temporary within a brw_try_draw_prims while the lock is held.
@ -305,9 +302,7 @@ copy_array_to_vbo_array( struct brw_context *brw,
   dri_bo_unmap(element->bo);
 }

-int brw_prepare_vertices( struct brw_context *brw,
-			       GLuint min_index,
-			       GLuint max_index )
+static void brw_prepare_vertices(struct brw_context *brw)
 {
   GLcontext *ctx = &brw->intel.ctx;
   struct intel_context *intel = intel_context(ctx);
@ -315,7 +310,8 @@ int brw_prepare_vertices( struct brw_context *brw,
   GLuint i;
   const unsigned char *ptr = NULL;
   GLuint interleave = 0;
-   int ret = 0;
+   unsigned int min_index = brw->vb.min_index;
+   unsigned int max_index = brw->vb.max_index;

   struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
   GLuint nr_enabled = 0;
@ -343,8 +339,10 @@ int brw_prepare_vertices( struct brw_context *brw,
    * cases with > 17 vertex attributes enabled, so it probably
    * isn't an issue at this point.
    */
-   if (nr_enabled >= BRW_VEP_MAX)
-       return -1;
+   if (nr_enabled >= BRW_VEP_MAX) {
+      intel->Fallback = 1;
+      return;
+   }

   for (i = 0; i < nr_enabled; i++) {
      struct brw_vertex_element *input = enabled[i];
@ -362,8 +360,6 @@ int brw_prepare_vertices( struct brw_context *brw,
 	 dri_bo_reference(input->bo);
 	 input->offset = (unsigned long)input->glarray->Ptr;
 	 input->stride = input->glarray->StrideB;
-
-	 ret |= dri_bufmgr_check_aperture_space(input->bo);
      } else {
 	 /* Queue the buffer object up to be uploaded in the next pass,
 	  * when we've decided if we're doing interleaved or not.
@ -372,7 +368,7 @@ int brw_prepare_vertices( struct brw_context *brw,
 	    /* Position array not properly enabled:
 	     */
 	    if (input->glarray->StrideB == 0)
-	      return -1;
+	      return;

 	    interleave = input->glarray->StrideB;
 	    ptr = input->glarray->Ptr;
@ -404,7 +400,6 @@ int brw_prepare_vertices( struct brw_context *brw,
       */
      copy_array_to_vbo_array(brw, upload[0], interleave);

-      ret |= dri_bufmgr_check_aperture_space(upload[0]->bo);
      for (i = 1; i < nr_uploads; i++) {
 	 /* Then, just point upload[i] at upload[0]'s buffer. */
 	 upload[i]->stride = interleave;
@ -418,23 +413,11 @@ int brw_prepare_vertices( struct brw_context *brw,
      /* Upload non-interleaved arrays */
      for (i = 0; i < nr_uploads; i++) {
          copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size);
-          if (upload[i]->bo) {
-              ret |= dri_bufmgr_check_aperture_space(upload[i]->bo);
-          }
      }
   }
-
-
-   if (ret)
-     return 1;
-
-
-   return 0;
 }

-void brw_emit_vertices( struct brw_context *brw,
-                        GLuint min_index,
-                        GLuint max_index )
+static void brw_emit_vertices(struct brw_context *brw)
 {
   GLcontext *ctx = &brw->intel.ctx;
   struct intel_context *intel = intel_context(ctx);
@ -470,9 +453,9 @@ void brw_emit_vertices( struct brw_context *brw,
 		BRW_VB0_ACCESS_VERTEXDATA |
 		(input->stride << BRW_VB0_PITCH_SHIFT));
      OUT_RELOC(input->bo,
-		DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+		I915_GEM_DOMAIN_VERTEX, 0,
 		input->offset);
-      OUT_BATCH(max_index);
+      OUT_BATCH(brw->vb.max_index);
      OUT_BATCH(0); /* Instance data step rate */

      /* Unreference the buffer so it can get freed, now that we won't
@ -516,18 +499,31 @@ void brw_emit_vertices( struct brw_context *brw,
   ADVANCE_BATCH();
 }

-int brw_prepare_indices( struct brw_context *brw,
-			 const struct _mesa_index_buffer *index_buffer,
-			 dri_bo **bo_return,
-			 GLuint *offset_return)
+const struct brw_tracked_state brw_vertices = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH | BRW_NEW_VERTICES,
+      .cache = 0,
+   },
+   .prepare = brw_prepare_vertices,
+   .emit = brw_emit_vertices,
+};
+
+static void brw_prepare_indices(struct brw_context *brw)
 {
   GLcontext *ctx = &brw->intel.ctx;
   struct intel_context *intel = &brw->intel;
-   GLuint ib_size = get_size(index_buffer->type) * index_buffer->count;
+   const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
+   GLuint ib_size;
   dri_bo *bo;
-   struct gl_buffer_object *bufferobj = index_buffer->obj;
-   GLuint offset = (GLuint)index_buffer->ptr;
-   int ret;
+   struct gl_buffer_object *bufferobj;
+   GLuint offset;
+
+   if (index_buffer == NULL)
+      return;
+
+   ib_size = get_size(index_buffer->type) * index_buffer->count;
+   bufferobj = index_buffer->obj;;

   /* Turn into a proper VBO:
    */
@ -541,6 +537,8 @@ int brw_prepare_indices( struct brw_context *brw,
       */
      dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
   } else {
+      offset = (GLuint)index_buffer->ptr;
+
      /* If the index buffer isn't aligned to its element size, we have to
       * rebase it into a temporary.
       */
@ -563,19 +561,22 @@ int brw_prepare_indices( struct brw_context *brw,
       }
   }

-   *bo_return = bo;
-   *offset_return = offset;
-   ret = dri_bufmgr_check_aperture_space(bo);
-   return ret;
+   dri_bo_unreference(brw->ib.bo);
+   brw->ib.bo = bo;
+   brw->ib.offset = offset;
 }

-void brw_emit_indices(struct brw_context *brw,
-                      const struct _mesa_index_buffer *index_buffer,
-                      dri_bo *bo,
-                      GLuint offset)
+static void brw_emit_indices(struct brw_context *brw)
 {
   struct intel_context *intel = &brw->intel;
-   GLuint ib_size = get_size(index_buffer->type) * index_buffer->count;
+   const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
+   GLuint ib_size;
+
+   if (index_buffer == NULL)
+      return;
+
+   ib_size = get_size(index_buffer->type) * index_buffer->count;
+
   /* Emit the indexbuffer packet:
    */
   {
@ -591,13 +592,23 @@ void brw_emit_indices(struct brw_context *brw,

      BEGIN_BATCH(4, IGNORE_CLIPRECTS);
      OUT_BATCH( ib.header.dword );
-      OUT_RELOC( bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, offset);
-      OUT_RELOC( bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
-		 offset + ib_size);
+      OUT_RELOC(brw->ib.bo,
+		I915_GEM_DOMAIN_VERTEX, 0,
+		brw->ib.offset);
+      OUT_RELOC(brw->ib.bo,
+		I915_GEM_DOMAIN_VERTEX, 0,
+		brw->ib.offset + ib_size);
      OUT_BATCH( 0 );
      ADVANCE_BATCH();
-
-      dri_bo_unreference(bo);
   }
 }

+const struct brw_tracked_state brw_indices = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH | BRW_NEW_INDICES,
+      .cache = 0,
+   },
+   .prepare = brw_prepare_indices,
+   .emit = brw_emit_indices,
+};
--- a/src/mesa/drivers/dri/i965/brw_fallback.c
+++ b/src/mesa/drivers/dri/i965/brw_fallback.c
@ -95,10 +95,9 @@ static GLboolean do_check_fallback(struct brw_context *brw)
   return GL_FALSE;
 }

-static int check_fallback(struct brw_context *brw)
+static void check_fallback(struct brw_context *brw)
 {
   brw->intel.Fallback = do_check_fallback(brw);
-   return 0;
 }

 const struct brw_tracked_state brw_check_fallback = {
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@ -162,10 +162,9 @@ static void populate_key( struct brw_context *brw,

 /* Calculate interpolants for triangle and line rasterization.
 */
-static int prepare_gs_prog( struct brw_context *brw )
+static void prepare_gs_prog(struct brw_context *brw)
 {
   struct brw_gs_prog_key key;
-   int ret = 0;
   /* Populate the key:
    */
   populate_key(brw, &key);
@ -183,11 +182,7 @@ static int prepare_gs_prog( struct brw_context *brw )
 					 &brw->gs.prog_data);
      if (brw->gs.prog_bo == NULL)
 	 compile_gs_prog( brw, &key );
-
-      ret |= dri_bufmgr_check_aperture_space(brw->gs.prog_bo);
   }
-
-   return ret;
 }


--- a/src/mesa/drivers/dri/i965/brw_gs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_state.c
@ -106,17 +106,17 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)

   if (key->prog_active) {
      /* Emit GS program relocation */
-      dri_emit_reloc(bo,
-		     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
-		     gs.thread0.grf_reg_count << 1,
-		     offsetof(struct brw_gs_unit_state, thread0),
-		     brw->gs.prog_bo);
+      intel_bo_emit_reloc(bo,
+			  I915_GEM_DOMAIN_INSTRUCTION, 0,
+			  gs.thread0.grf_reg_count << 1,
+			  offsetof(struct brw_gs_unit_state, thread0),
+			  brw->gs.prog_bo);
   }

   return bo;
 }

-static int prepare_gs_unit( struct brw_context *brw )
+static void prepare_gs_unit(struct brw_context *brw)
 {
   struct brw_gs_unit_key key;

@ -130,7 +130,6 @@ static int prepare_gs_unit( struct brw_context *brw )
   if (brw->gs.state_bo == NULL) {
      brw->gs.state_bo = gs_unit_create_from_key(brw, &key);
   }
-   return dri_bufmgr_check_aperture_space(brw->gs.state_bo);
 }

 const struct brw_tracked_state brw_gs_unit = {
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@ -81,6 +81,13 @@ const struct brw_tracked_state brw_blend_constant_color = {
 static void upload_binding_table_pointers(struct brw_context *brw)
 {
   struct intel_context *intel = &brw->intel;
+   dri_bo *aper_array[] = {
+      intel->batch->buf,
+      brw->wm.bind_bo,
+   };
+
+   if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array)))
+      intel_batchbuffer_flush(intel->batch);

   BEGIN_BATCH(6, IGNORE_CLIPRECTS);
   OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
@ -88,7 +95,9 @@ static void upload_binding_table_pointers(struct brw_context *brw)
   OUT_BATCH(0); /* gs */
   OUT_BATCH(0); /* clip */
   OUT_BATCH(0); /* sf */
-   OUT_RELOC(brw->wm.bind_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
+   OUT_RELOC(brw->wm.bind_bo,
+	     I915_GEM_DOMAIN_SAMPLER, 0,
+	     0);
   ADVANCE_BATCH();
 }

@ -114,49 +123,43 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )

   BEGIN_BATCH(7, IGNORE_CLIPRECTS);
   OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2));
-   OUT_RELOC(brw->vs.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
+   OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
   if (brw->gs.prog_active)
-      OUT_RELOC(brw->gs.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 1);
+      OUT_RELOC(brw->gs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
   else
      OUT_BATCH(0);
   if (!brw->metaops.active)
-      OUT_RELOC(brw->clip.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 1);
+      OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
   else
      OUT_BATCH(0);
-   OUT_RELOC(brw->sf.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
-   OUT_RELOC(brw->wm.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
-   OUT_RELOC(brw->cc.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
+   OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
   ADVANCE_BATCH();

   brw->state.dirty.brw |= BRW_NEW_PSP;
 }

-#if 0
-/* Combined into brw_psp_urb_cbs */
-const struct brw_tracked_state brw_pipelined_state_pointers = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_METAOPS | BRW_NEW_BATCH,
-      .cache = (CACHE_NEW_VS_UNIT | 
-		CACHE_NEW_GS_UNIT | 
-		CACHE_NEW_GS_PROG | 
-		CACHE_NEW_CLIP_UNIT | 
-		CACHE_NEW_SF_UNIT | 
-		CACHE_NEW_WM_UNIT | 
-		CACHE_NEW_CC_UNIT)
-   },
-   .emit = upload_pipelined_state_pointers
-};
-#endif
-
 static void upload_psp_urb_cbs(struct brw_context *brw )
 {
+   struct intel_context *intel = &brw->intel;
+   dri_bo *aper_array[] = {
+      intel->batch->buf,
+      brw->vs.state_bo,
+      brw->gs.state_bo,
+      brw->clip.state_bo,
+      brw->wm.state_bo,
+      brw->cc.state_bo,
+   };
+
+   if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array)))
+      intel_batchbuffer_flush(intel->batch);
+
   upload_pipelined_state_pointers(brw);
   brw_upload_urb_fence(brw);
   brw_upload_constant_buffer_state(brw);
 }

-
 const struct brw_tracked_state brw_psp_urb_cbs = {
   .dirty = {
      .mesa = 0,
@ -172,22 +175,6 @@ const struct brw_tracked_state brw_psp_urb_cbs = {
   .emit = upload_psp_urb_cbs,
 };

-/**
- * Upload the depthbuffer offset and format.
- *
- * We have to do this per state validation as we need to emit the relocation
- * in the batch buffer.
- */
-
-static int prepare_depthbuffer(struct brw_context *brw)
-{
-   struct intel_region *region = brw->state.depth_region;
-
-   if (!region || !region->buffer)
-      return 0;
-   return dri_bufmgr_check_aperture_space(region->buffer);
-}
-
 static void emit_depthbuffer(struct brw_context *brw)
 {
   struct intel_context *intel = &brw->intel;
@ -209,6 +196,10 @@ static void emit_depthbuffer(struct brw_context *brw)
      ADVANCE_BATCH();
   } else {
      unsigned int format;
+      dri_bo *aper_array[] = {
+	 intel->batch->buf,
+	 region->buffer
+      };

      switch (region->cpp) {
      case 2:
@ -225,15 +216,19 @@ static void emit_depthbuffer(struct brw_context *brw)
 	 return;
      }

+      if (dri_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array)))
+	 intel_batchbuffer_flush(intel->batch);
+
      BEGIN_BATCH(len, IGNORE_CLIPRECTS);
      OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
      OUT_BATCH(((region->pitch * region->cpp) - 1) |
 		(format << 18) |
 		(BRW_TILEWALK_YMAJOR << 26) |
-		(region->tiled << 27) |
+		((region->tiling != I915_TILING_NONE) << 27) |
 		(BRW_SURFACE_2D << 29));
      OUT_RELOC(region->buffer,
-		DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, 0);
+		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		0);
      OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
 		((region->pitch - 1) << 6) |
 		((region->height - 1) << 19));
@ -252,7 +247,6 @@ const struct brw_tracked_state brw_depthbuffer = {
      .brw = BRW_NEW_DEPTH_BUFFER | BRW_NEW_BATCH,
      .cache = 0,
   },
-   .prepare = prepare_depthbuffer,
   .emit = emit_depthbuffer,
 };

@ -380,40 +374,6 @@ const struct brw_tracked_state brw_line_stipple = {
 };


-
-/***********************************************************************
- * Misc constant state packets
- */
-
-static void upload_pipe_control(struct brw_context *brw)
-{
-   struct brw_pipe_control pc;
-
-   return;
-
-   memset(&pc, 0, sizeof(pc));
-
-   pc.header.opcode = CMD_PIPE_CONTROL;
-   pc.header.length = sizeof(pc)/4 - 2;
-   pc.header.post_sync_operation = PIPE_CONTROL_NOWRITE;
-
-   pc.header.instruction_state_cache_flush_enable = 1;
-
-   pc.bits1.dest_addr_type = PIPE_CONTROL_GTTWRITE_GLOBAL;
-
-   BRW_BATCH_STRUCT(brw, &pc);
-}
-
-const struct brw_tracked_state brw_pipe_control = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_BATCH,
-      .cache = 0
-   },
-   .emit = upload_pipe_control
-};
-
-
 /***********************************************************************
 * Misc invarient state packets
 */
--- a/src/mesa/drivers/dri/i965/brw_sf.c
+++ b/src/mesa/drivers/dri/i965/brw_sf.c
@ -125,7 +125,7 @@ static void compile_sf_prog( struct brw_context *brw,

 /* Calculate interpolants for triangle and line rasterization.
 */
-static int upload_sf_prog( struct brw_context *brw )
+static void upload_sf_prog(struct brw_context *brw)
 {
   struct brw_sf_prog_key key;

@ -174,7 +174,6 @@ static int upload_sf_prog( struct brw_context *brw )
 				      &brw->sf.prog_data);
   if (brw->sf.prog_bo == NULL)
      compile_sf_prog( brw, &key );
-   return dri_bufmgr_check_aperture_space(brw->sf.prog_bo);
 }


--- a/src/mesa/drivers/dri/i965/brw_sf_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
@ -37,7 +37,7 @@
 #include "macros.h"
 #include "intel_fbo.h"

-static int upload_sf_vp(struct brw_context *brw)
+static void upload_sf_vp(struct brw_context *brw)
 {
   GLcontext *ctx = &brw->intel.ctx;
   const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
@ -98,8 +98,6 @@ static int upload_sf_vp(struct brw_context *brw)

   dri_bo_unreference(brw->sf.vp_bo);
   brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 );
-
-   return dri_bufmgr_check_aperture_space(brw->sf.vp_bo);
 }

 const struct brw_tracked_state brw_sf_vp = {
@ -253,27 +251,26 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
 			 NULL, NULL);

   /* Emit SF program relocation */
-   dri_emit_reloc(bo,
-		  DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
-		  sf.thread0.grf_reg_count << 1,
-		  offsetof(struct brw_sf_unit_state, thread0),
-		  brw->sf.prog_bo);
+   intel_bo_emit_reloc(bo,
+		       I915_GEM_DOMAIN_INSTRUCTION, 0,
+		       sf.thread0.grf_reg_count << 1,
+		       offsetof(struct brw_sf_unit_state, thread0),
+		       brw->sf.prog_bo);

   /* Emit SF viewport relocation */
-   dri_emit_reloc(bo,
-		  DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
-		  sf.sf5.front_winding | (sf.sf5.viewport_transform << 1),
-		  offsetof(struct brw_sf_unit_state, sf5),
-		  brw->sf.vp_bo);
+   intel_bo_emit_reloc(bo,
+		       I915_GEM_DOMAIN_INSTRUCTION, 0,
+		       sf.sf5.front_winding | (sf.sf5.viewport_transform << 1),
+		       offsetof(struct brw_sf_unit_state, sf5),
+		       brw->sf.vp_bo);

   return bo;
 }

-static int upload_sf_unit( struct brw_context *brw )
+static void upload_sf_unit( struct brw_context *brw )
 {
   struct brw_sf_unit_key key;
   dri_bo *reloc_bufs[2];
-   int ret = 0;

   sf_unit_populate_key(brw, &key);

@ -288,15 +285,6 @@ static int upload_sf_unit( struct brw_context *brw )
   if (brw->sf.state_bo == NULL) {
      brw->sf.state_bo = sf_unit_create_from_key(brw, &key, reloc_bufs);
   }
-
-   if (reloc_bufs[0])
-     ret |= dri_bufmgr_check_aperture_space(reloc_bufs[0]);
-
-   if (reloc_bufs[1])
-     ret |= dri_bufmgr_check_aperture_space(reloc_bufs[1]);
-
-   ret |= dri_bufmgr_check_aperture_space(brw->sf.state_bo);
-   return ret;
 }

 const struct brw_tracked_state brw_sf_unit = {
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@ -80,6 +80,9 @@ const struct brw_tracked_state brw_pipe_control;
 const struct brw_tracked_state brw_clear_surface_cache;
 const struct brw_tracked_state brw_clear_batch_cache;

+const struct brw_tracked_state brw_indices;
+const struct brw_tracked_state brw_vertices;
+
 /***********************************************************************
 * brw_state_cache.c
 */
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@ -214,10 +214,7 @@ brw_upload_cache( struct brw_cache *cache,

   /* Create the buffer object to contain the data */
   bo = dri_bo_alloc(cache->brw->intel.bufmgr,
-		     cache->name[cache_id], data_size, 1 << 6,
-		     DRM_BO_FLAG_MEM_LOCAL |
-		     DRM_BO_FLAG_CACHED |
-		     DRM_BO_FLAG_CACHED_MAPPED);
+		     cache->name[cache_id], data_size, 1 << 6);


   /* Set up the memory containing the key, aux_data, and reloc_bufs */
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@ -80,7 +80,6 @@ const struct brw_tracked_state *atoms[] =
    */
   &brw_invarient_state,
   &brw_state_base_address,
-   &brw_pipe_control,

   &brw_binding_table_pointers,
   &brw_blend_constant_color,
@ -102,6 +101,8 @@ const struct brw_tracked_state *atoms[] =
   &brw_psp_urb_cbs,
 #endif

+   &brw_indices,
+   &brw_vertices,

   NULL,			/* brw_constant_buffer */
 };
@ -173,10 +174,12 @@ static void xor_states( struct brw_state_flags *result,
 /***********************************************************************
 * Emit all state:
 */
-int brw_validate_state( struct brw_context *brw )
+void brw_validate_state( struct brw_context *brw )
 {
+   struct intel_context *intel = &brw->intel;
   struct brw_state_flags *state = &brw->state.dirty;
-   GLuint i, ret, count;
+   GLuint i, count, pass = 0;
+   dri_bo *last_batch_bo = NULL;

   state->mesa |= brw->intel.NewGLState;
   brw->intel.NewGLState = 0;
@ -202,7 +205,7 @@ int brw_validate_state( struct brw_context *brw )
   if (state->mesa == 0 &&
       state->cache == 0 &&
       state->brw == 0)
-      return 0;
+      return;

   if (brw->state.dirty.brw & BRW_NEW_CONTEXT)
      brw_clear_batch_cache_flush(brw);
@ -220,15 +223,23 @@ int brw_validate_state( struct brw_context *brw )

      if (check_state(state, &atom->dirty)) {
         if (atom->prepare) {
-            ret = atom->prepare(brw);
-            if (ret)
-               return ret;
+            atom->prepare(brw);
        }
      }
   }

   if (brw->intel.Fallback)
-      return 0;
+      return;
+
+   /* We're about to try to set up a coherent state in the batchbuffer for
+    * the emission of primitives.  If we exceed the aperture size in any of the
+    * emit() calls, we need to go back to square 1 and try setting up again.
+    */
+got_flushed:
+   dri_bo_unreference(last_batch_bo);
+   last_batch_bo = intel->batch->buf;
+   dri_bo_reference(last_batch_bo);
+   assert(pass++ <= 2);

   if (INTEL_DEBUG) {
      /* Debug version which enforces various sanity checks on the
@ -251,8 +262,11 @@ int brw_validate_state( struct brw_context *brw )
 	    break;

 	 if (check_state(state, &atom->dirty)) {
-	    if (atom->emit)
+	    if (atom->emit) {
 	       atom->emit( brw );
+	       if (intel->batch->buf != last_batch_bo)
+		  goto got_flushed;
+	    }
 	 }

 	 accumulate_state(&examined, &atom->dirty);
@ -274,13 +288,17 @@ int brw_validate_state( struct brw_context *brw )
 	    break;

 	 if (check_state(state, &atom->dirty)) {
-	    if (atom->emit)
+	    if (atom->emit) {
 	       atom->emit( brw );
+	       if (intel->batch->buf != last_batch_bo)
+		  goto got_flushed;
+	    }
 	 }
      }
   }

+   dri_bo_unreference(last_batch_bo);
+
   if (!brw->intel.Fallback)
      memset(state, 0, sizeof(*state));
-   return 0;
 }
--- a/src/mesa/drivers/dri/i965/brw_urb.c
+++ b/src/mesa/drivers/dri/i965/brw_urb.c
@ -74,7 +74,7 @@ static GLboolean check_urb_layout( struct brw_context *brw )
 /* Most minimal update, forces re-emit of URB fence packet after GS
 * unit turned on/off.
 */
-static int recalculate_urb_fence( struct brw_context *brw )
+static void recalculate_urb_fence( struct brw_context *brw )
 {
   GLuint csize = brw->curbe.total_size;
   GLuint vsize = brw->vs.prog_data->urb_entry_size;
@ -142,7 +142,6 @@ static int recalculate_urb_fence( struct brw_context *brw )
      
      brw->state.dirty.brw |= BRW_NEW_URB_FENCE;
   }
-   return 0;
 }


@ -187,15 +186,3 @@ void brw_upload_urb_fence(struct brw_context *brw)

   BRW_BATCH_STRUCT(brw, &uf);
 }
-
-
-#if 0
-const struct brw_tracked_state brw_urb_fence = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_URB_FENCE | BRW_NEW_PSP,
-      .cache = 0
-   },
-   .update = brw_upload_urb_fence
-};
-#endif
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@ -83,7 +83,7 @@ static void do_vs_prog( struct brw_context *brw,
 }


-static int brw_upload_vs_prog( struct brw_context *brw )
+static void brw_upload_vs_prog(struct brw_context *brw)
 {
   struct brw_vs_prog_key key;
   struct brw_vertex_program *vp = 
@ -115,7 +115,6 @@ static int brw_upload_vs_prog( struct brw_context *brw )
 				      &brw->vs.prog_data);
   if (brw->vs.prog_bo == NULL)
      do_vs_prog(brw, vp, &key);
-   return dri_bufmgr_check_aperture_space(brw->vs.prog_bo);
 }


--- a/src/mesa/drivers/dri/i965/brw_vs_constval.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c
@ -166,7 +166,7 @@ static GLuint get_input_size(struct brw_context *brw,
 /* Calculate sizes of vertex program outputs.  Size is the largest
 * component index which might vary from [0,0,0,1]
 */
-static int calc_wm_input_sizes( struct brw_context *brw )
+static void calc_wm_input_sizes( struct brw_context *brw )
 {
   /* BRW_NEW_VERTEX_PROGRAM */
   struct brw_vertex_program *vp = 
@ -210,7 +210,6 @@ static int calc_wm_input_sizes( struct brw_context *brw )
      memcpy(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks));
      brw->state.dirty.brw |= BRW_NEW_WM_INPUT_DIMENSIONS;
   }
-   return 0;
 }

 const struct brw_tracked_state brw_wm_input_sizes = {
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@ -115,16 +115,16 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
 			 NULL, NULL);

   /* Emit VS program relocation */
-   dri_emit_reloc(bo,
-		  DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
-		  vs.thread0.grf_reg_count << 1,
-		  offsetof(struct brw_vs_unit_state, thread0),
-		  brw->vs.prog_bo);
+   intel_bo_emit_reloc(bo,
+		       I915_GEM_DOMAIN_INSTRUCTION, 0,
+		       vs.thread0.grf_reg_count << 1,
+		       offsetof(struct brw_vs_unit_state, thread0),
+		       brw->vs.prog_bo);

   return bo;
 }

-static int prepare_vs_unit( struct brw_context *brw )
+static void prepare_vs_unit(struct brw_context *brw)
 {
   struct brw_vs_unit_key key;

@ -138,7 +138,6 @@ static int prepare_vs_unit( struct brw_context *brw )
   if (brw->vs.state_bo == NULL) {
      brw->vs.state_bo = vs_unit_create_from_key(brw, &key);
   }
-   return dri_bufmgr_check_aperture_space(brw->vs.state_bo);
 }

 const struct brw_tracked_state brw_vs_unit = {
--- a/src/mesa/drivers/dri/i965/brw_vs_tnl.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_tnl.c
@ -1581,7 +1581,7 @@ static GLuint hash_key( struct state_key *key )
   return hash;
 }

-static int prepare_tnl_program( struct brw_context *brw )
+static void prepare_tnl_program( struct brw_context *brw )
 {
   GLcontext *ctx = &brw->intel.ctx;
   struct state_key key;
@ -1590,7 +1590,7 @@ static int prepare_tnl_program( struct brw_context *brw )

   /* _NEW_PROGRAM */
   if (brw->attribs.VertexProgram->_Current) 
-      return 0;
+      return;
      
   /* Grab all the relevent state and put it in a single structure:
    */
@ -1623,7 +1623,7 @@ static int prepare_tnl_program( struct brw_context *brw )

   if (old != brw->tnl_program)
      brw->state.dirty.brw |= BRW_NEW_TNL_PROGRAM;
-   return 0;
+   return;
 }

 /* Note: See brw_draw.c - the vertex program must not rely on
@ -1649,7 +1649,7 @@ const struct brw_tracked_state brw_tnl_vertprog = {



-static int prepare_active_vertprog( struct brw_context *brw )
+static void prepare_active_vertprog( struct brw_context *brw )
 {
   const struct gl_vertex_program *prev = brw->vertex_program;

@ -1664,8 +1664,6 @@ static int prepare_active_vertprog( struct brw_context *brw )

   if (brw->vertex_program != prev) 
      brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
-
-   return 0;
 }


--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@ -325,7 +325,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
 }


-static int brw_prepare_wm_prog( struct brw_context *brw )
+static void brw_prepare_wm_prog(struct brw_context *brw)
 {
   struct brw_wm_prog_key key;
   struct brw_fragment_program *fp = (struct brw_fragment_program *)
@ -342,8 +342,6 @@ static int brw_prepare_wm_prog( struct brw_context *brw )
 				      &brw->wm.prog_data);
   if (brw->wm.prog_bo == NULL)
      do_wm_prog(brw, fp, &key);
-
-   return dri_bufmgr_check_aperture_space(brw->wm.prog_bo);
 }


--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@ -255,11 +255,10 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
 * complicates various things.  However, this is still too confusing -
 * FIXME: simplify all the different new texture state flags.
 */
-static int upload_wm_samplers( struct brw_context *brw )
+static void upload_wm_samplers( struct brw_context *brw )
 {
   struct wm_sampler_key key;
   int i;
-   int ret = 0;

   brw_wm_sampler_populate_key(brw, &key);

@ -271,7 +270,7 @@ static int upload_wm_samplers( struct brw_context *brw )
   dri_bo_unreference(brw->wm.sampler_bo);
   brw->wm.sampler_bo = NULL;
   if (brw->wm.sampler_count == 0)
-      return 0;
+      return;

   brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER,
 					 &key, sizeof(key),
@ -304,19 +303,14 @@ static int upload_wm_samplers( struct brw_context *brw )
 	 if (!brw->attribs.Texture->Unit[i]._ReallyEnabled)
 	    continue;

-	 ret |= dri_bufmgr_check_aperture_space(brw->wm.sdc_bo[i]);
-	 dri_emit_reloc(brw->wm.sampler_bo,
-			DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
-			0,
-			i * sizeof(struct brw_sampler_state) +
-			offsetof(struct brw_sampler_state, ss2),
-			brw->wm.sdc_bo[i]);
+	 intel_bo_emit_reloc(brw->wm.sampler_bo,
+			     I915_GEM_DOMAIN_INSTRUCTION, 0,
+			     0,
+			     i * sizeof(struct brw_sampler_state) +
+			     offsetof(struct brw_sampler_state, ss2),
+			     brw->wm.sdc_bo[i]);
      }
   }
-
-   ret |= dri_bufmgr_check_aperture_space(brw->wm.sampler_bo);
-   return ret;
-
 }

 const struct brw_tracked_state brw_wm_samplers = {
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@ -199,40 +199,39 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
 			 NULL, NULL);

   /* Emit WM program relocation */
-   dri_emit_reloc(bo,
-		  DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
-		  wm.thread0.grf_reg_count << 1,
-		  offsetof(struct brw_wm_unit_state, thread0),
-		  brw->wm.prog_bo);
+   intel_bo_emit_reloc(bo,
+		       I915_GEM_DOMAIN_INSTRUCTION, 0,
+		       wm.thread0.grf_reg_count << 1,
+		       offsetof(struct brw_wm_unit_state, thread0),
+		       brw->wm.prog_bo);

   /* Emit scratch space relocation */
   if (key->total_scratch != 0) {
-      dri_emit_reloc(bo,
-		     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE,
-		     wm.thread2.per_thread_scratch_space,
-		     offsetof(struct brw_wm_unit_state, thread2),
-		     brw->wm.scratch_buffer);
+      intel_bo_emit_reloc(bo,
+			  0, 0,
+			  wm.thread2.per_thread_scratch_space,
+			  offsetof(struct brw_wm_unit_state, thread2),
+			  brw->wm.scratch_buffer);
   }

   /* Emit sampler state relocation */
   if (key->sampler_count != 0) {
-      dri_emit_reloc(bo,
-		     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
-		     wm.wm4.stats_enable | (wm.wm4.sampler_count << 2),
-		     offsetof(struct brw_wm_unit_state, wm4),
-		     brw->wm.sampler_bo);
+      intel_bo_emit_reloc(bo,
+			  I915_GEM_DOMAIN_INSTRUCTION, 0,
+			  wm.wm4.stats_enable | (wm.wm4.sampler_count << 2),
+			  offsetof(struct brw_wm_unit_state, wm4),
+			  brw->wm.sampler_bo);
   }

   return bo;
 }


-static int upload_wm_unit( struct brw_context *brw )
+static void upload_wm_unit( struct brw_context *brw )
 {
   struct intel_context *intel = &brw->intel;
   struct brw_wm_unit_key key;
   dri_bo *reloc_bufs[3];
-   int ret = 0, i;
   wm_unit_populate_key(brw, &key);

   /* Allocate the necessary scratch space if we haven't already.  Don't
@ -251,7 +250,7 @@ static int upload_wm_unit( struct brw_context *brw )
 	 brw->wm.scratch_buffer = dri_bo_alloc(intel->bufmgr,
 					       "wm scratch",
 					       total,
-					       4096, DRM_BO_FLAG_MEM_TT);
+					       4096);
      }
   }

@ -267,12 +266,6 @@ static int upload_wm_unit( struct brw_context *brw )
   if (brw->wm.state_bo == NULL) {
      brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs);
   }
-
-   for (i = 0; i < 3; i++)
-     if (reloc_bufs[i])
-       ret |= dri_bufmgr_check_aperture_space(reloc_bufs[i]);
-   ret |= dri_bufmgr_check_aperture_space(brw->wm.state_bo);
-   return ret;
 }

 const struct brw_tracked_state brw_wm_unit = {
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@ -154,10 +154,28 @@ struct brw_wm_surface_key {
   GLint first_level, last_level;
   GLint width, height, depth;
   GLint pitch, cpp;
-   GLboolean tiled;
-   GLuint offset;
+   uint32_t tiling;
 };

+static void
+brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
+{
+   switch (tiling) {
+   case I915_TILING_NONE:
+      surf->ss3.tiled_surface = 0;
+      surf->ss3.tile_walk = 0;
+      break;
+   case I915_TILING_X:
+      surf->ss3.tiled_surface = 1;
+      surf->ss3.tile_walk = BRW_TILEWALK_XMAJOR;
+      break;
+   case I915_TILING_Y:
+      surf->ss3.tiled_surface = 1;
+      surf->ss3.tile_walk = BRW_TILEWALK_YMAJOR;
+      break;
+   }
+}
+
 static dri_bo *
 brw_create_texture_surface( struct brw_context *brw,
 			    struct brw_wm_surface_key *key )
@ -169,32 +187,18 @@ brw_create_texture_surface( struct brw_context *brw,

   surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
   surf.ss0.surface_type = translate_tex_target(key->target);
-
-   if (key->bo) 
-      surf.ss0.surface_format = translate_tex_format(key->format, key->depthmode);
-   else {
-     switch(key->depth) {
-     case 32: surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; break;
-     default:
-     case 24: surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8X8_UNORM; break;
-     case 16: surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; break;
-     }
-   }
+   surf.ss0.surface_format = translate_tex_format(key->format, key->depthmode);

   /* This is ok for all textures with channel width 8bit or less:
    */
 /*    surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
-   if (key->bo)
-     surf.ss1.base_addr = key->bo->offset; /* reloc */
-   else
-     surf.ss1.base_addr = key->offset;
+
+   surf.ss1.base_addr = key->bo->offset; /* reloc */

   surf.ss2.mip_count = key->last_level - key->first_level;
   surf.ss2.width = key->width - 1;
   surf.ss2.height = key->height - 1;
-
-   surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
-   surf.ss3.tiled_surface = key->tiled;
+   brw_set_surface_tiling(&surf, key->tiling);
   surf.ss3.pitch = (key->pitch * key->cpp) - 1;
   surf.ss3.depth = key->depth - 1;

@ -211,21 +215,21 @@ brw_create_texture_surface( struct brw_context *brw,

   bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE,
 			 key, sizeof(*key),
-			 &key->bo, key->bo ? 1 : 0,
+			 &key->bo, 1,
 			 &surf, sizeof(surf),
 			 NULL, NULL);
-   if (key->bo) {
-      /* Emit relocation to surface contents */
-      dri_emit_reloc(bo,
-		     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
-		     0,
-		     offsetof(struct brw_surface_state, ss1),
-		     key->bo);
-   }
+
+   /* Emit relocation to surface contents */
+   intel_bo_emit_reloc(bo,
+		       I915_GEM_DOMAIN_SAMPLER, 0,
+		       0,
+		       offsetof(struct brw_surface_state, ss1),
+		       key->bo);
+
   return bo;
 }

-static int
+static void
 brw_update_texture_surface( GLcontext *ctx, GLuint unit )
 {
   struct brw_context *brw = brw_context(ctx);
@ -233,44 +237,29 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
   struct intel_texture_object *intelObj = intel_texture_object(tObj);
   struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
   struct brw_wm_surface_key key;
-   int ret = 0;

   memset(&key, 0, sizeof(key));
-
-   if (intelObj->imageOverride) {
-      key.pitch = intelObj->pitchOverride / intelObj->mt->cpp;
-      key.depth = intelObj->depthOverride;
-      key.bo = NULL;
-      key.offset = intelObj->textureOffset;
-   } else {
-      key.format = firstImage->TexFormat->MesaFormat;
-      key.pitch = intelObj->mt->pitch;
-      key.depth = firstImage->Depth;
-      key.bo = intelObj->mt->region->buffer;
-      key.offset = 0;
-      ret |= dri_bufmgr_check_aperture_space(key.bo);
-   }
-
   key.target = tObj->Target;
   key.depthmode = tObj->DepthMode;
+   key.format = firstImage->TexFormat->MesaFormat;
+   key.bo = intelObj->mt->region->buffer;
   key.first_level = intelObj->firstLevel;
   key.last_level = intelObj->lastLevel;
   key.width = firstImage->Width;
   key.height = firstImage->Height;
+   key.pitch = intelObj->mt->pitch;
   key.cpp = intelObj->mt->cpp;
-   key.tiled = intelObj->mt->region->tiled;
+   key.depth = firstImage->Depth;
+   key.tiling = intelObj->mt->region->tiling;

   dri_bo_unreference(brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS]);
   brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_search_cache(&brw->cache, BRW_SS_SURFACE,
-							       &key, sizeof(key),
-							       &key.bo, key.bo ? 1 : 0,
-							       NULL);
+						&key, sizeof(key),
+						&key.bo, 1,
+						NULL);
   if (brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] == NULL) {
      brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_create_texture_surface(brw, &key);
   }
-
-   ret |= dri_bufmgr_check_aperture_space(brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS]);
-   return ret;
 }

 /**
@ -278,18 +267,18 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
 * While it is only used for the front/back buffer currently, it should be
 * usable for further buffers when doing ARB_draw_buffer support.
 */
-static int
+static void
 brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
 			  unsigned int unit, GLboolean cached)
 {
   dri_bo *region_bo = NULL;
-   int ret = 0;
   struct {
      unsigned int surface_type;
      unsigned int surface_format;
      unsigned int width, height, cpp;
      GLubyte color_mask[4];
-      GLboolean tiled, color_blend;
+      GLboolean color_blend;
+      uint32_t tiling;
   } key;

   memset(&key, 0, sizeof(key));
@ -302,16 +291,14 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
 	 key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
      else
 	 key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
-      key.tiled = region->tiled;
+      key.tiling = region->tiling;
      key.width = region->pitch; /* XXX: not really! */
      key.height = region->height;
      key.cpp = region->cpp;
-
-      ret |= dri_bufmgr_check_aperture_space(region->buffer);
   } else {
      key.surface_type = BRW_SURFACE_NULL;
      key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
-      key.tiled = 0;
+      key.tiling = 0;
      key.width = 1;
      key.height = 1;
      key.cpp = 4;
@ -341,8 +328,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region,

      surf.ss2.width = key.width - 1;
      surf.ss2.height = key.height - 1;
-      surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
-      surf.ss3.tiled_surface = key.tiled;
+      brw_set_surface_tiling(&surf, key.tiling);
      surf.ss3.pitch = (key.width * key.cpp) - 1;

      /* _NEW_COLOR */
@ -359,19 +345,19 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region,
 					       &surf, sizeof(surf),
 					       NULL, NULL);
      if (region_bo != NULL) {
-	 dri_emit_reloc(brw->wm.surf_bo[unit],
-			DRM_BO_FLAG_MEM_TT |
-			DRM_BO_FLAG_READ |
-			DRM_BO_FLAG_WRITE,
-			0,
-			offsetof(struct brw_surface_state, ss1),
-			region_bo);
+	 /* We might sample from it, and we might render to it, so flag
+	  * them both.  We might be able to figure out from other state
+	  * a more restrictive relocation to emit.
+	  */
+	 intel_bo_emit_reloc(brw->wm.surf_bo[unit],
+			     I915_GEM_DOMAIN_RENDER |
+			     I915_GEM_DOMAIN_SAMPLER,
+			     I915_GEM_DOMAIN_RENDER,
+			     0,
+			     offsetof(struct brw_surface_state, ss1),
+			     region_bo);
      }
   }
-
-   ret |= dri_bufmgr_check_aperture_space(brw->wm.surf_bo[unit]);
-
-   return ret;
 }


@ -409,13 +395,11 @@ brw_wm_get_binding_table(struct brw_context *brw)
      /* Emit binding table relocations to surface state */
      for (i = 0; i < BRW_WM_MAX_SURF; i++) {
 	 if (brw->wm.surf_bo[i] != NULL) {
-	    dri_emit_reloc(bind_bo,
-			   DRM_BO_FLAG_MEM_TT |
-			   DRM_BO_FLAG_READ |
-			   DRM_BO_FLAG_WRITE,
-			   0,
-			   i * sizeof(GLuint),
-			   brw->wm.surf_bo[i]);
+	    intel_bo_emit_reloc(bind_bo,
+				I915_GEM_DOMAIN_INSTRUCTION, 0,
+				0,
+				i * sizeof(GLuint),
+				brw->wm.surf_bo[i]);
 	 }
      }

@ -425,23 +409,19 @@ brw_wm_get_binding_table(struct brw_context *brw)
   return bind_bo;
 }

-static int prepare_wm_surfaces(struct brw_context *brw )
+static void prepare_wm_surfaces(struct brw_context *brw )
 {
   GLcontext *ctx = &brw->intel.ctx;
   struct intel_context *intel = &brw->intel;
-   GLuint i, ret;
+   GLuint i;

   if (brw->state.nr_draw_regions  > 1) {
      for (i = 0; i < brw->state.nr_draw_regions; i++) {
-         ret = brw_update_region_surface(brw, brw->state.draw_regions[i], i,
-                                         GL_FALSE);
-         if (ret)
-            return ret;
+         brw_update_region_surface(brw, brw->state.draw_regions[i], i,
+				   GL_FALSE);
      }
   }else {
-      ret = brw_update_region_surface(brw, brw->state.draw_regions[0], 0, GL_TRUE);
-      if (ret)
-         return ret;
+      brw_update_region_surface(brw, brw->state.draw_regions[0], 0, GL_TRUE);
   }

   brw->wm.nr_surfaces = MAX_DRAW_BUFFERS;
@ -457,11 +437,8 @@ static int prepare_wm_surfaces(struct brw_context *brw )
            dri_bo_reference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]);
            brw->wm.nr_surfaces = i + MAX_DRAW_BUFFERS + 1;
         } else {
-            ret = brw_update_texture_surface(ctx, i);
+            brw_update_texture_surface(ctx, i);
            brw->wm.nr_surfaces = i + MAX_DRAW_BUFFERS + 1;
-
-            if (ret)
-               return ret;
         }
      } else {
         dri_bo_unreference(brw->wm.surf_bo[i+MAX_DRAW_BUFFERS]);
@ -472,8 +449,6 @@ static int prepare_wm_surfaces(struct brw_context *brw )

   dri_bo_unreference(brw->wm.bind_bo);
   brw->wm.bind_bo = brw_wm_get_binding_table(brw);
-
-   return dri_bufmgr_check_aperture_space(brw->wm.bind_bo);
 }


--- a/src/mesa/drivers/dri/i965/intel_bufmgr_ttm.c
+++ b/src/mesa/drivers/dri/i965/intel_bufmgr_ttm.c
@ -1 +0,0 @@
-../intel/intel_bufmgr_ttm.c
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@ -29,6 +29,7 @@
 #include "intel_ioctl.h"
 #include "intel_decode.h"
 #include "intel_reg.h"
+#include "intel_bufmgr.h"

 /* Relocations in kernel space:
 *    - pass dma buffer seperately
@ -78,19 +79,21 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch)
      batch->buf = NULL;
   }

+   if (!batch->buffer && intel->ttm == GL_TRUE)
+      batch->buffer = malloc (intel->maxBatchSize);
+
   batch->buf = dri_bo_alloc(intel->bufmgr, "batchbuffer",
-			     intel->maxBatchSize, 4096,
-			     DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED);
-   dri_bo_map(batch->buf, GL_TRUE);
-   batch->map = batch->buf->virtual;
+			     intel->maxBatchSize, 4096);
+   if (batch->buffer)
+      batch->map = batch->buffer;
+   else {
+      dri_bo_map(batch->buf, GL_TRUE);
+      batch->map = batch->buf->virtual;
+   }
   batch->size = intel->maxBatchSize;
   batch->ptr = batch->map;
   batch->dirty_state = ~0;
   batch->cliprect_mode = IGNORE_CLIPRECTS;
-
-   /* account batchbuffer in aperture */
-   dri_bufmgr_check_aperture_space(batch->buf);
-
 }

 struct intel_batchbuffer *
@ -99,7 +102,6 @@ intel_batchbuffer_alloc(struct intel_context *intel)
   struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1);

   batch->intel = intel;
-   batch->last_fence = NULL;
   intel_batchbuffer_reset(batch);

   return batch;
@ -108,14 +110,13 @@ intel_batchbuffer_alloc(struct intel_context *intel)
 void
 intel_batchbuffer_free(struct intel_batchbuffer *batch)
 {
-   if (batch->last_fence) {
-      dri_fence_wait(batch->last_fence);
-      dri_fence_unreference(batch->last_fence);
-      batch->last_fence = NULL;
-   }
-   if (batch->map) {
-      dri_bo_unmap(batch->buf);
-      batch->map = NULL;
+   if (batch->buffer)
+      free (batch->buffer);
+   else {
+      if (batch->map) {
+	 dri_bo_unmap(batch->buf);
+	 batch->map = NULL;
+      }
   }
   dri_bo_unreference(batch->buf);
   batch->buf = NULL;
@ -131,11 +132,12 @@ do_flush_locked(struct intel_batchbuffer *batch,
 		GLuint used, GLboolean allow_unlock)
 {
   struct intel_context *intel = batch->intel;
-   void *start;
-   GLuint count;
+   int ret = 0;

-   dri_bo_unmap(batch->buf);
-   start = dri_process_relocs(batch->buf, &count);
+   if (batch->buffer)
+      dri_bo_subdata (batch->buf, 0, used, batch->buffer);
+   else
+      dri_bo_unmap(batch->buf);

   batch->map = NULL;
   batch->ptr = NULL;
@ -148,21 +150,25 @@ do_flush_locked(struct intel_batchbuffer *batch,
   if (!(intel->numClipRects == 0 &&
 	 batch->cliprect_mode == LOOP_CLIPRECTS)) {
      if (intel->ttm == GL_TRUE) {
-	 intel_exec_ioctl(batch->intel,
-			  used,
-			  batch->cliprect_mode != LOOP_CLIPRECTS,
-			  allow_unlock,
-			  start, count, &batch->last_fence);
+	 struct drm_i915_gem_execbuffer *execbuf;
+
+	 execbuf = dri_process_relocs(batch->buf);
+	 ret = intel_exec_ioctl(batch->intel,
+				used,
+				batch->cliprect_mode != LOOP_CLIPRECTS,
+				allow_unlock,
+				execbuf);
      } else {
-	 intel_batch_ioctl(batch->intel,
-			   batch->buf->offset,
-			   used,
-			   batch->cliprect_mode != LOOP_CLIPRECTS,
-			   allow_unlock);
+	 dri_process_relocs(batch->buf);
+	 ret = intel_batch_ioctl(batch->intel,
+				 batch->buf->offset,
+				 used,
+				 batch->cliprect_mode != LOOP_CLIPRECTS,
+				 allow_unlock);
      }
   }
-      
-   dri_post_submit(batch->buf, &batch->last_fence);
+
+   dri_post_submit(batch->buf);

   if (intel->numClipRects == 0 &&
       batch->cliprect_mode == LOOP_CLIPRECTS) {
@ -187,6 +193,10 @@ do_flush_locked(struct intel_batchbuffer *batch,
 	 intel->vtbl.debug_batch(intel);
   }

+   if (ret != 0) {
+      UNLOCK_HARDWARE(intel);
+      exit(1);
+   }
   intel->vtbl.new_batch(intel);
 }

@ -204,21 +214,27 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
   if (INTEL_DEBUG & DEBUG_BATCH)
      fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,
 	      used);
-   /* Add the MI_BATCH_BUFFER_END.  Always add an MI_FLUSH - this is a
-    * performance drain that we would like to avoid.
-    */
-   if (used & 4) {
-      ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd();
-      ((int *) batch->ptr)[1] = 0;
-      ((int *) batch->ptr)[2] = MI_BATCH_BUFFER_END;
-      used += 12;
+
+   /* Emit a flush if the bufmgr doesn't do it for us. */
+   if (!intel->ttm) {
+      *(GLuint *) (batch->ptr) = intel->vtbl.flush_cmd();
+      batch->ptr += 4;
+      used = batch->ptr - batch->map;
   }
-   else {
-      ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd();
-      ((int *) batch->ptr)[1] = MI_BATCH_BUFFER_END;
-      used += 8;
+
+   /* Round batchbuffer usage to 2 DWORDs. */
+
+   if ((used & 4) == 0) {
+      *(GLuint *) (batch->ptr) = 0; /* noop */
+      batch->ptr += 4;
+      used = batch->ptr - batch->map;
   }

+   /* Mark the end of the buffer. */
+   *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; /* noop */
+   batch->ptr += 4;
+   used = batch->ptr - batch->map;
+
   /* Workaround for recursive batchbuffer flushing: If the window is
    * moved, we can get into a case where we try to flush during a
    * flush.  What happens is that when we try to grab the lock for
@ -230,6 +246,9 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
    * avoid that in the first place. */
   batch->ptr = batch->map;

+   if (intel->vtbl.finish_batch)
+      intel->vtbl.finish_batch(intel);
+
   /* TODO: Just pass the relocation list and dma buffer up to the
    * kernel.
    */
@ -242,9 +261,13 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
      UNLOCK_HARDWARE(intel);

   if (INTEL_DEBUG & DEBUG_SYNC) {
+      int irq;
+
      fprintf(stderr, "waiting for idle\n");
-      if (batch->last_fence != NULL)
-	 dri_fence_wait(batch->last_fence);
+      LOCK_HARDWARE(intel);
+      irq = intelEmitIrqLocked(intel);
+      UNLOCK_HARDWARE(intel);
+      intelWaitIrq(intel, irq);
   }

   /* Reset the buffer:
@ -252,25 +275,22 @@ _intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
   intel_batchbuffer_reset(batch);
 }

-void
-intel_batchbuffer_finish(struct intel_batchbuffer *batch)
-{
-   intel_batchbuffer_flush(batch);
-   if (batch->last_fence != NULL)
-      dri_fence_wait(batch->last_fence);
-}
-

 /*  This is the only way buffers get added to the validate list.
 */
 GLboolean
 intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
                             dri_bo *buffer,
-                             GLuint flags, GLuint delta)
+                             uint32_t read_domains, uint32_t write_domain,
+			     uint32_t delta)
 {
   int ret;

-   ret = dri_emit_reloc(batch->buf, flags, delta, batch->ptr - batch->map, buffer);
+   if (batch->ptr - batch->map > batch->buf->size)
+    _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n",
+		  batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
+   ret = intel_bo_emit_reloc(batch->buf, read_domains, write_domain,
+			     delta, batch->ptr - batch->map, buffer);

   /*
    * Using the old buffer offset, write in what the right data would be, in case
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
@ -4,6 +4,7 @@
 #include "mtypes.h"

 #include "dri_bufmgr.h"
+#include "intel_reg.h"

 struct intel_context;

@ -40,7 +41,8 @@ struct intel_batchbuffer
   struct intel_context *intel;

   dri_bo *buf;
-   dri_fence *last_fence;
+
+   GLubyte *buffer;

   GLubyte *map;
   GLubyte *ptr;
@ -58,8 +60,6 @@ struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context
 void intel_batchbuffer_free(struct intel_batchbuffer *batch);


-void intel_batchbuffer_finish(struct intel_batchbuffer *batch);
-
 void _intel_batchbuffer_flush(struct intel_batchbuffer *batch,
 			      const char *file, int line);

@ -82,14 +82,16 @@ void intel_batchbuffer_release_space(struct intel_batchbuffer *batch,

 GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
                                       dri_bo *buffer,
-                                       GLuint flags, GLuint offset);
+				       uint32_t read_domains,
+				       uint32_t write_domain,
+				       uint32_t offset);

 /* Inline functions - might actually be better off with these
 * non-inlined.  Certainly better off switching all command packets to
 * be passed as structs rather than dwords, but that's a little bit of
 * work...
 */
-static INLINE GLuint
+static INLINE GLint
 intel_batchbuffer_space(struct intel_batchbuffer *batch)
 {
   return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map);
@ -136,12 +138,20 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch,

 #define OUT_BATCH(d)  intel_batchbuffer_emit_dword(intel->batch, d)

-#define OUT_RELOC(buf, cliprect_mode, delta) do { 			\
+#define OUT_RELOC(buf, read_domains, write_domain, delta) do {		\
   assert((delta) >= 0);						\
-   intel_batchbuffer_emit_reloc(intel->batch, buf, cliprect_mode, delta); \
+   intel_batchbuffer_emit_reloc(intel->batch, buf,			\
+				read_domains, write_domain, delta);	\
 } while (0)

 #define ADVANCE_BATCH() do { } while(0)


+static INLINE void
+intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
+{
+   intel_batchbuffer_require_space(batch, 4, IGNORE_CLIPRECTS);
+   intel_batchbuffer_emit_dword(batch, MI_FLUSH);
+}
+
 #endif
--- a/src/mesa/drivers/dri/intel/intel_blit.c
+++ b/src/mesa/drivers/dri/intel/intel_blit.c
@ -54,7 +54,6 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,

   struct intel_context *intel;
   const intelScreenPrivate *intelScreen;
-   int ret;

   DBG("%s\n", __FUNCTION__);

@ -66,14 +65,6 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,

   intelScreen = intel->intelScreen;

-   if (intel->last_swap_fence) {
-      dri_fence_wait(intel->last_swap_fence);
-      dri_fence_unreference(intel->last_swap_fence);
-      intel->last_swap_fence = NULL;
-   }
-   intel->last_swap_fence = intel->first_swap_fence;
-   intel->first_swap_fence = NULL;
-
   /* The LOCK_HARDWARE is required for the cliprects.  Buffer offsets
    * should work regardless.
    */
@ -89,6 +80,7 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
      unsigned short src_x, src_y;
      int BR13, CMD;
      int i;
+      dri_bo *aper_array[3];

      src = intel_get_rb_region(&intel_fb->Base, BUFFER_BACK_LEFT);
      dst = intel_get_rb_region(&intel_fb->Base, BUFFER_FRONT_LEFT);
@ -114,26 +106,28 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
      }

 #ifndef I915
-      if (src->tiled) {
+      if (src->tiling != I915_TILING_NONE) {
 	 CMD |= XY_SRC_TILED;
 	 src_pitch /= 4;
      }
-      if (dst->tiled) {
+      if (dst->tiling != I915_TILING_NONE) {
 	 CMD |= XY_DST_TILED;
 	 dst_pitch /= 4;
      }
 #endif
      /* do space/cliprects check before going any further */
-      intel_batchbuffer_require_space(intel->batch, 8 * 4, REFERENCES_CLIPRECTS);
+      intel_batchbuffer_require_space(intel->batch, 8 * 4,
+				      REFERENCES_CLIPRECTS);
   again:
-      ret = dri_bufmgr_check_aperture_space(dst->buffer);
-      ret |= dri_bufmgr_check_aperture_space(src->buffer);
-      
-      if (ret) {
+      aper_array[0] = intel->batch->buf;
+      aper_array[1] = dst->buffer;
+      aper_array[2] = src->buffer;
+
+      if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) {
 	intel_batchbuffer_flush(intel->batch);
 	goto again;
      }
-      
+
      for (i = 0; i < nbox; i++, pbox++) {
 	 drm_clip_rect_t box = *pbox;

@ -157,19 +151,22 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
 	 OUT_BATCH((box.y1 << 16) | box.x1);
 	 OUT_BATCH((box.y2 << 16) | box.x2);

-	 OUT_RELOC(dst->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, 0);
+	 OUT_RELOC(dst->buffer,
+		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		   0);
 	 OUT_BATCH((src_y << 16) | src_x);
 	 OUT_BATCH(src_pitch);
-	 OUT_RELOC(src->buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0);
+	 OUT_RELOC(src->buffer,
+		   I915_GEM_DOMAIN_RENDER, 0,
+		   0);
 	 ADVANCE_BATCH();
      }

-      if (intel->first_swap_fence)
-	 dri_fence_unreference(intel->first_swap_fence);
+      /* Flush the rendering and the batch so that the results all land on the
+       * screen in a timely fashion.
+       */
+      intel_batchbuffer_emit_mi_flush(intel->batch);
      intel_batchbuffer_flush(intel->batch);
-      intel->first_swap_fence = intel->batch->last_fence;
-      if (intel->first_swap_fence)
-	 dri_fence_reference(intel->first_swap_fence);
   }

   UNLOCK_HARDWARE(intel);
@ -184,7 +181,7 @@ intelEmitFillBlit(struct intel_context *intel,
 		  GLshort dst_pitch,
 		  dri_bo *dst_buffer,
 		  GLuint dst_offset,
-		  GLboolean dst_tiled,
+		  uint32_t dst_tiling,
 		  GLshort x, GLshort y,
 		  GLshort w, GLshort h,
 		  GLuint color)
@ -209,7 +206,7 @@ intelEmitFillBlit(struct intel_context *intel,
      return;
   }
 #ifndef I915
-   if (dst_tiled) {
+   if (dst_tiling != I915_TILING_NONE) {
      CMD |= XY_DST_TILED;
      dst_pitch /= 4;
   }
@ -226,7 +223,9 @@ intelEmitFillBlit(struct intel_context *intel,
   OUT_BATCH(BR13 | dst_pitch);
   OUT_BATCH((y << 16) | x);
   OUT_BATCH(((y + h) << 16) | (x + w));
-   OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset);
+   OUT_RELOC(dst_buffer,
+	     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+	     dst_offset);
   OUT_BATCH(color);
   ADVANCE_BATCH();
 }
@ -263,11 +262,11 @@ intelEmitCopyBlit(struct intel_context *intel,
 		  GLshort src_pitch,
 		  dri_bo *src_buffer,
 		  GLuint src_offset,
-		  GLboolean src_tiled,
+		  uint32_t src_tiling,
 		  GLshort dst_pitch,
 		  dri_bo *dst_buffer,
 		  GLuint dst_offset,
-		  GLboolean dst_tiled,
+		  uint32_t dst_tiling,
 		  GLshort src_x, GLshort src_y,
 		  GLshort dst_x, GLshort dst_y,
 		  GLshort w, GLshort h,
@ -276,17 +275,19 @@ intelEmitCopyBlit(struct intel_context *intel,
   GLuint CMD, BR13;
   int dst_y2 = dst_y + h;
   int dst_x2 = dst_x + w;
-   int ret;
+   dri_bo *aper_array[3];
   BATCH_LOCALS;

   /* do space/cliprects check before going any further */
   intel_batchbuffer_require_space(intel->batch, 8 * 4, NO_LOOP_CLIPRECTS);
 again:
-   ret = dri_bufmgr_check_aperture_space(dst_buffer);
-   ret |= dri_bufmgr_check_aperture_space(src_buffer);
-   if (ret) {
-     intel_batchbuffer_flush(intel->batch);
-     goto again;
+   aper_array[0] = intel->batch->buf;
+   aper_array[1] = dst_buffer;
+   aper_array[2] = src_buffer;
+
+   if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) {
+      intel_batchbuffer_flush(intel->batch);
+      goto again;
   }

   DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
@ -315,11 +316,11 @@ intelEmitCopyBlit(struct intel_context *intel,
   }

 #ifndef I915
-   if (dst_tiled) {
+   if (dst_tiling != I915_TILING_NONE) {
      CMD |= XY_DST_TILED;
      dst_pitch /= 4;
   }
-   if (src_tiled) {
+   if (src_tiling != I915_TILING_NONE) {
      CMD |= XY_SRC_TILED;
      src_pitch /= 4;
   }
@ -345,11 +346,13 @@ intelEmitCopyBlit(struct intel_context *intel,
      OUT_BATCH(BR13 | dst_pitch);
      OUT_BATCH((dst_y << 16) | dst_x);
      OUT_BATCH((dst_y2 << 16) | dst_x2);
-      OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+      OUT_RELOC(dst_buffer,
+		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
 		dst_offset);
      OUT_BATCH((src_y << 16) | src_x);
      OUT_BATCH(src_pitch);
-      OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+      OUT_RELOC(src_buffer,
+		I915_GEM_DOMAIN_RENDER, 0,
 		src_offset);
      ADVANCE_BATCH();
   }
@ -362,14 +365,17 @@ intelEmitCopyBlit(struct intel_context *intel,
      OUT_BATCH(BR13 | ((uint16_t)dst_pitch));
      OUT_BATCH((0 << 16) | dst_x);
      OUT_BATCH((h << 16) | dst_x2);
-      OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+      OUT_RELOC(dst_buffer,
+		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
 		dst_offset + dst_y * dst_pitch);
      OUT_BATCH((0 << 16) | src_x);
      OUT_BATCH(src_pitch);
-      OUT_RELOC(src_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+      OUT_RELOC(src_buffer,
+		I915_GEM_DOMAIN_RENDER, 0,
 		src_offset + src_y * src_pitch);
      ADVANCE_BATCH();
   }
+   intel_batchbuffer_emit_mi_flush(intel->batch);
 }


@ -513,7 +519,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
               }

 #ifndef I915
-	       if (irb_region->tiled) {
+	       if (irb_region->tiling != I915_TILING_NONE) {
 		  CMD |= XY_DST_TILED;
 		  pitch /= 4;
 	       }
@ -541,7 +547,8 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
               OUT_BATCH(BR13);
               OUT_BATCH((b.y1 << 16) | b.x1);
               OUT_BATCH((b.y2 << 16) | b.x2);
-               OUT_RELOC(write_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+               OUT_RELOC(write_buffer,
+			 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                         irb_region->draw_offset);
               OUT_BATCH(clearVal);
               ADVANCE_BATCH();
@ -549,7 +556,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
            }
         }
      }
-      intel_batchbuffer_flush(intel->batch);
+      intel_batchbuffer_emit_mi_flush(intel->batch);
   }

   UNLOCK_HARDWARE(intel);
@ -563,7 +570,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
 				  GLshort dst_pitch,
 				  dri_bo *dst_buffer,
 				  GLuint dst_offset,
-				  GLboolean dst_tiled,
+				  uint32_t dst_tiling,
 				  GLshort x, GLshort y,
 				  GLshort w, GLshort h,
 				  GLenum logic_op)
@ -587,13 +594,13 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
 				    (8 * 4) +
 				    (3 * 4) +
 				    dwords,
-				    NO_LOOP_CLIPRECTS );
+				    REFERENCES_CLIPRECTS );

   opcode = XY_SETUP_BLT_CMD;
   if (cpp == 4)
      opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
 #ifndef I915
-   if (dst_tiled) {
+   if (dst_tiling != I915_TILING_NONE) {
      opcode |= XY_DST_TILED;
      dst_pitch /= 4;
   }
@ -606,15 +613,17 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
      br13 |= BR13_8888;

   blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */
-   if (dst_tiled)
+   if (dst_tiling != I915_TILING_NONE)
      blit_cmd |= XY_DST_TILED;

-   BEGIN_BATCH(8 + 3, NO_LOOP_CLIPRECTS);
+   BEGIN_BATCH(8 + 3, REFERENCES_CLIPRECTS);
   OUT_BATCH(opcode);
   OUT_BATCH(br13);
   OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
   OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
-   OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, dst_offset);
+   OUT_RELOC(dst_buffer,
+	     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+	     dst_offset);
   OUT_BATCH(0); /* bg */
   OUT_BATCH(fg_color); /* fg */
   OUT_BATCH(0); /* pattern base addr */
@ -627,5 +636,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
   intel_batchbuffer_data( intel->batch,
 			   src_bits,
 			   dwords * 4,
-			   NO_LOOP_CLIPRECTS );
+			   REFERENCES_CLIPRECTS );
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
 }
--- a/src/mesa/drivers/dri/intel/intel_blit.h
+++ b/src/mesa/drivers/dri/intel/intel_blit.h
@ -42,11 +42,11 @@ extern void intelEmitCopyBlit(struct intel_context *intel,
                              GLshort src_pitch,
                              dri_bo *src_buffer,
                              GLuint src_offset,
-			      GLboolean src_tiled,
+			      uint32_t src_tiling,
                              GLshort dst_pitch,
                              dri_bo *dst_buffer,
                              GLuint dst_offset,
-			      GLboolean dst_tiled,
+			      uint32_t dst_tiling,
                              GLshort srcx, GLshort srcy,
                              GLshort dstx, GLshort dsty,
                              GLshort w, GLshort h,
@ -57,7 +57,7 @@ extern void intelEmitFillBlit(struct intel_context *intel,
                              GLshort dst_pitch,
                              dri_bo *dst_buffer,
                              GLuint dst_offset,
-			      GLboolean dst_tiled,
+			      uint32_t dst_tiling,
                              GLshort x, GLshort y,
                              GLshort w, GLshort h, GLuint color);

@ -69,7 +69,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
 				  GLshort dst_pitch,
 				  dri_bo *dst_buffer,
 				  GLuint dst_offset,
-				  GLboolean dst_tiled,
+				  uint32_t dst_tiling,
 				  GLshort x, GLshort y,
 				  GLshort w, GLshort h,
 				  GLenum logic_op);
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@ -32,6 +32,7 @@

 #include "intel_context.h"
 #include "intel_buffer_objects.h"
+#include "intel_batchbuffer.h"
 #include "intel_regions.h"
 #include "dri_bufmgr.h"

@ -45,8 +46,7 @@ intel_bufferobj_alloc_buffer(struct intel_context *intel,
 			     struct intel_buffer_object *intel_obj)
 {
   intel_obj->buffer = dri_bo_alloc(intel->bufmgr, "bufferobj",
-				    intel_obj->Base.Size, 64,
-				    DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED | DRM_BO_FLAG_CACHED_MAPPED);
+				    intel_obj->Base.Size, 64);
 }

 /**
--- a/src/mesa/drivers/dri/intel/intel_buffers.c
+++ b/src/mesa/drivers/dri/intel/intel_buffers.c
@ -819,6 +819,8 @@ intelSwapBuffers(__DRIdrawablePrivate * dPriv)

 	 intel_fb->swap_ust = ust;
      }
+      drmCommandNone(intel->driFd, DRM_I915_GEM_THROTTLE);
+
   }
   else {
      /* XXX this shouldn't be an error but we can't handle it for now */
--- a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c
+++ b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.c
--- a/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h
+++ b/src/mesa/drivers/dri/intel/intel_bufmgr_ttm.h
@ -1,28 +0,0 @@
-
-#ifndef INTEL_BUFMGR_TTM_H
-#define INTEL_BUFMGR_TTM_H
-
-#include "dri_bufmgr.h"
-
-extern dri_bo *intel_ttm_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name,
-					       unsigned int handle);
-
-#ifdef TTM_API
-dri_fence *intel_ttm_fence_create_from_arg(dri_bufmgr *bufmgr, const char *name,
-					   drm_fence_arg_t *arg);
-#endif
-
-
-dri_bufmgr *intel_bufmgr_ttm_init(int fd, unsigned int fence_type,
-				  unsigned int fence_type_flush, int batch_size);
-
-void
-intel_ttm_enable_bo_reuse(dri_bufmgr *bufmgr);
-
-#ifndef TTM_API
-#define DRM_I915_FENCE_CLASS_ACCEL 0
-#define DRM_I915_FENCE_TYPE_RW 2
-#define DRM_I915_FENCE_FLAG_FLUSHED 0x01000000
-#endif
-
-#endif
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@ -59,7 +59,7 @@
 #include "intel_buffer_objects.h"
 #include "intel_fbo.h"
 #include "intel_decode.h"
-#include "intel_bufmgr_ttm.h"
+#include "intel_bufmgr.h"

 #include "drirenderbuffer.h"
 #include "vblank.h"
@ -96,11 +96,13 @@ int INTEL_DEBUG = (0);

 #include "extension_helper.h"

-#define DRIVER_DATE                     "20061102"
+#define DRIVER_DATE                     "20080716"
+#define DRIVER_DATE_GEM                 "GEM " DRIVER_DATE

 static const GLubyte *
 intelGetString(GLcontext * ctx, GLenum name)
 {
+   const struct intel_context *const intel = intel_context(ctx);
   const char *chipset;
   static char buffer[128];

@ -110,7 +112,7 @@ intelGetString(GLcontext * ctx, GLenum name)
      break;

   case GL_RENDERER:
-      switch (intel_context(ctx)->intelScreen->deviceID) {
+      switch (intel->intelScreen->deviceID) {
      case PCI_CHIP_845_G:
         chipset = "Intel(R) 845G";
         break;
@ -183,7 +185,9 @@ intelGetString(GLcontext * ctx, GLenum name)
         break;
      }

-      (void) driGetRendererString(buffer, chipset, DRIVER_DATE, 0);
+      (void) driGetRendererString(buffer, chipset, 
+				  (intel->ttm) ? DRIVER_DATE_GEM : DRIVER_DATE,
+				  0);
      return (GLubyte *) buffer;

   default:
@ -366,22 +370,34 @@ intelFlush(GLcontext * ctx)
   if (!IS_965(intel->intelScreen->deviceID))
      INTEL_FIREVERTICES(intel);

+   /* Emit a flush so that any frontbuffer rendering that might have occurred
+    * lands onscreen in a timely manner, even if the X Server doesn't trigger
+    * a flush for us.
+    */
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+
   if (intel->batch->map != intel->batch->ptr)
      intel_batchbuffer_flush(intel->batch);
-
-   /* XXX: Need to do an MI_FLUSH here.
-    */
 }

 void
 intelFinish(GLcontext * ctx)
 {
-   struct intel_context *intel = intel_context(ctx);
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   int i;
+
   intelFlush(ctx);
-   if (intel->batch->last_fence) {
-      dri_fence_wait(intel->batch->last_fence);
-      dri_fence_unreference(intel->batch->last_fence);
-      intel->batch->last_fence = NULL;
+
+   for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
+       struct intel_renderbuffer *irb;
+
+       irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]);
+
+       if (irb->region)
+	  dri_bo_wait_rendering(irb->region->buffer);
+   }
+   if (fb->_DepthBuffer) {
+      /* XXX: Wait on buffer idle */
   }
 }

@ -447,28 +463,32 @@ static GLboolean
 intel_init_bufmgr(struct intel_context *intel)
 {
   intelScreenPrivate *intelScreen = intel->intelScreen;
-   GLboolean ttm_disable = getenv("INTEL_NO_TTM") != NULL;
-   GLboolean ttm_supported;
+   GLboolean gem_disable = getenv("INTEL_NO_GEM") != NULL;
+   int gem_kernel = 0;
+   GLboolean gem_supported;
+   struct drm_i915_getparam gp;

-   /* If we've got a new enough DDX that's initializing TTM and giving us
+   gp.param = I915_PARAM_HAS_GEM;
+   gp.value = &gem_kernel;
+
+   (void) drmCommandWriteRead(intel->driFd, DRM_I915_GETPARAM, &gp, sizeof(gp));
+
+   /* If we've got a new enough DDX that's initializing GEM and giving us
    * object handles for the shared buffers, use that.
    */
   intel->ttm = GL_FALSE;
   if (intel->intelScreen->driScrnPriv->dri2.enabled)
-       ttm_supported = GL_TRUE;
+       gem_supported = GL_TRUE;
   else if (intel->intelScreen->driScrnPriv->ddx_version.minor >= 9 &&
-	    intel->intelScreen->drmMinor >= 11 &&
+	    gem_kernel &&
 	    intel->intelScreen->front.bo_handle != -1)
-       ttm_supported = GL_TRUE;
+       gem_supported = GL_TRUE;
   else
-       ttm_supported = GL_FALSE;
+       gem_supported = GL_FALSE;

-   if (!ttm_disable && ttm_supported) {
+   if (!gem_disable && gem_supported) {
      int bo_reuse_mode;
-      intel->bufmgr = intel_bufmgr_ttm_init(intel->driFd,
-					    DRM_FENCE_TYPE_EXE,
-					    DRM_FENCE_TYPE_EXE |
-					    DRM_I915_FENCE_TYPE_RW,
+      intel->bufmgr = intel_bufmgr_gem_init(intel->driFd,
 					    BATCH_SZ);
      if (intel->bufmgr != NULL)
 	 intel->ttm = GL_TRUE;
@ -478,16 +498,16 @@ intel_init_bufmgr(struct intel_context *intel)
      case DRI_CONF_BO_REUSE_DISABLED:
 	 break;
      case DRI_CONF_BO_REUSE_ALL:
-	 intel_ttm_enable_bo_reuse(intel->bufmgr);
+	 intel_bufmgr_gem_enable_reuse(intel->bufmgr);
 	 break;
      }
   }
   /* Otherwise, use the classic buffer manager. */
   if (intel->bufmgr == NULL) {
-      if (ttm_disable) {
-	 fprintf(stderr, "TTM buffer manager disabled.  Using classic.\n");
+      if (gem_disable) {
+	 fprintf(stderr, "GEM disabled.  Using classic.\n");
      } else {
-	 fprintf(stderr, "Failed to initialize TTM buffer manager.  "
+	 fprintf(stderr, "Failed to initialize GEM.  "
 		 "Falling back to classic.\n");
      }

@ -497,14 +517,17 @@ intel_init_bufmgr(struct intel_context *intel)
 	 return GL_FALSE;
      }

-      intel->bufmgr = dri_bufmgr_fake_init(intelScreen->tex.offset,
-					   intelScreen->tex.map,
-					   intelScreen->tex.size,
-					   intel_fence_emit,
-					   intel_fence_wait,
-					   intel);
+      intel->bufmgr = intel_bufmgr_fake_init(intelScreen->tex.offset,
+					     intelScreen->tex.map,
+					     intelScreen->tex.size,
+					     intel_fence_emit,
+					     intel_fence_wait,
+					     intel);
   }

+   /* XXX bufmgr should be per-screen, not per-context */
+   intelScreen->ttm = intel->ttm;
+
   return GL_TRUE;
 }

@ -672,8 +695,6 @@ intelInitContext(struct intel_context *intel,
      intel_recreate_static_regions(intel);

   intel->batch = intel_batchbuffer_alloc(intel);
-   intel->last_swap_fence = NULL;
-   intel->first_swap_fence = NULL;

   intel_bufferobj_init(intel);
   intel_fbo_init(intel);
@ -691,7 +712,6 @@ intelInitContext(struct intel_context *intel,
   /* Force all software fallbacks */
   if (driQueryOptionb(&intel->optionCache, "no_rast")) {
      fprintf(stderr, "disabling 3D rasterization\n");
-      FALLBACK(intel, INTEL_FALLBACK_USER, 1);
      intel->no_rast = 1;
   }

@ -726,17 +746,7 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv)
      intel->Fallback = 0;      /* don't call _swrast_Flush later */

      intel_batchbuffer_free(intel->batch);
-
-      if (intel->last_swap_fence) {
-	 dri_fence_wait(intel->last_swap_fence);
-	 dri_fence_unreference(intel->last_swap_fence);
-	 intel->last_swap_fence = NULL;
-      }
-      if (intel->first_swap_fence) {
-	 dri_fence_wait(intel->first_swap_fence);
-	 dri_fence_unreference(intel->first_swap_fence);
-	 intel->first_swap_fence = NULL;
-      }
+      free(intel->prim.vb);

      if (release_texture_heaps) {
         /* This share group is about to go away, free our private
@ -888,7 +898,7 @@ intelContendedLock(struct intel_context *intel, GLuint flags)
    */
   if (!intel->ttm && sarea->texAge != intel->hHWContext) {
      sarea->texAge = intel->hHWContext;
-      dri_bufmgr_fake_contended_lock_take(intel->bufmgr);
+      intel_bufmgr_fake_contended_lock_take(intel->bufmgr);
      if (INTEL_DEBUG & DEBUG_BATCH)
 	 intel_decode_context_reset();
      if (INTEL_DEBUG & DEBUG_BUFMGR)
@ -1016,6 +1026,7 @@ void UNLOCK_HARDWARE( struct intel_context *intel )
    * Nothing should be left in batch outside of LOCK/UNLOCK which references
    * cliprects.
    */
-   assert(intel->batch->cliprect_mode != REFERENCES_CLIPRECTS);
+   if (intel->batch->cliprect_mode == REFERENCES_CLIPRECTS)
+      intel_batchbuffer_flush(intel->batch);
 }

--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@ -35,6 +35,7 @@
 #include "mm.h"
 #include "texmem.h"
 #include "dri_bufmgr.h"
+#include "intel_bufmgr.h"

 #include "intel_screen.h"
 #include "intel_tex_obj.h"
@ -85,6 +86,7 @@ struct intel_context
   {
      void (*destroy) (struct intel_context * intel);
      void (*emit_state) (struct intel_context * intel);
+      void (*finish_batch) (struct intel_context * intel);
      void (*new_batch) (struct intel_context * intel);
      void (*emit_invarient_state) (struct intel_context * intel);
      void (*note_fence) (struct intel_context *intel, GLuint fence);
@ -174,9 +176,6 @@ struct intel_context
    */
   GLboolean ttm;

-   dri_fence *last_swap_fence;
-   dri_fence *first_swap_fence;
-
   struct intel_batchbuffer *batch;
   GLboolean no_batch_wrap;
   unsigned batch_id;
@ -184,9 +183,13 @@ struct intel_context
   struct
   {
      GLuint id;
-      GLuint primitive;
-      GLubyte *start_ptr;
+      uint32_t primitive;	/**< Current hardware primitive type */
      void (*flush) (struct intel_context *);
+      dri_bo *vb_bo;
+      uint8_t *vb;
+      unsigned int start_offset; /**< Byte offset of primitive sequence */
+      unsigned int current_offset; /**< Byte offset of next vertex */
+      unsigned int count;	/**< Number of vertices in current primitive */
   } prim;

   GLuint stats_wm;
@ -291,6 +294,7 @@ extern char *__progname;
 #define SUBPIXEL_X 0.125
 #define SUBPIXEL_Y 0.125

+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
 #define ALIGN(value, alignment)  ((value + alignment - 1) & ~(alignment - 1))

 #define INTEL_FIREVERTICES(intel)		\
--- a/src/mesa/drivers/dri/intel/intel_decode.c
+++ b/src/mesa/drivers/dri/intel/intel_decode.c
@ -183,9 +183,10 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
    switch ((data[0] & 0x1fc00000) >> 22) {
    case 0x50:
 	instr_out(data, hw_offset, 0,
-		  "XY_COLOR_BLT (rgb %sabled, alpha %sabled)\n",
+		  "XY_COLOR_BLT (rgb %sabled, alpha %sabled, dst tile %d)\n",
 		  (data[0] & (1 << 20)) ? "en" : "dis",
-		  (data[0] & (1 << 21)) ? "en" : "dis");
+		  (data[0] & (1 << 21)) ? "en" : "dis",
+		  (data[0] >> 11) & 1);

 	len = (data[0] & 0x000000ff) + 2;
 	if (len != 6)
@ -210,7 +211,8 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures)

 	instr_out(data, hw_offset, 1, "format %s, pitch %d, "
 		  "clipping %sabled\n", format,
-		  data[1] & 0xffff, data[1] & (1 << 30) ? "en" : "dis");
+		  (short)(data[1] & 0xffff),
+		  data[1] & (1 << 30) ? "en" : "dis");
 	instr_out(data, hw_offset, 2, "(%d,%d)\n",
 		  data[2] & 0xffff, data[2] >> 16);
 	instr_out(data, hw_offset, 3, "(%d,%d)\n",
@ -220,9 +222,12 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
 	return len;
    case 0x53:
 	instr_out(data, hw_offset, 0,
-		  "XY_SRC_COPY_BLT (rgb %sabled, alpha %sabled)\n",
+		  "XY_SRC_COPY_BLT (rgb %sabled, alpha %sabled, "
+		  "src tile %d, dst tile %d)\n",
 		  (data[0] & (1 << 20)) ? "en" : "dis",
-		  (data[0] & (1 << 21)) ? "en" : "dis");
+		  (data[0] & (1 << 21)) ? "en" : "dis",
+		  (data[0] >> 15) & 1,
+		  (data[0] >> 11) & 1);

 	len = (data[0] & 0x000000ff) + 2;
 	if (len != 8)
@ -247,16 +252,17 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures)

 	instr_out(data, hw_offset, 1, "format %s, dst pitch %d, "
 		  "clipping %sabled\n", format,
-		  data[1] & 0xffff, data[1] & (1 << 30) ? "en" : "dis");
+		  (short)(data[1] & 0xffff),
+		  data[1] & (1 << 30) ? "en" : "dis");
 	instr_out(data, hw_offset, 2, "dst (%d,%d)\n",
 		  data[2] & 0xffff, data[2] >> 16);
 	instr_out(data, hw_offset, 3, "dst (%d,%d)\n",
-		  data[2] & 0xffff, data[2] >> 16);
+		  data[3] & 0xffff, data[3] >> 16);
 	instr_out(data, hw_offset, 4, "dst offset 0x%08x\n", data[4]);
 	instr_out(data, hw_offset, 5, "src (%d,%d)\n",
 		  data[5] & 0xffff, data[5] >> 16);
 	instr_out(data, hw_offset, 6, "src pitch %d\n",
-		  data[6] & 0xffff);
+		  (short)(data[6] & 0xffff));
 	instr_out(data, hw_offset, 7, "src offset 0x%08x\n", data[7]);
 	return len;
    }
--- a/src/mesa/drivers/dri/intel/intel_depthstencil.c
+++ b/src/mesa/drivers/dri/intel/intel_depthstencil.c
@ -39,7 +39,7 @@
 #include "intel_fbo.h"
 #include "intel_depthstencil.h"
 #include "intel_regions.h"
-
+#include "intel_span.h"

 /**
 * The GL_EXT_framebuffer_object allows the user to create their own
@ -86,68 +86,33 @@
 *
 */

-
-
-static void
-map_regions(GLcontext * ctx,
-            struct intel_renderbuffer *depthRb,
-            struct intel_renderbuffer *stencilRb)
-{
-   struct intel_context *intel = intel_context(ctx);
-   if (depthRb && depthRb->region) {
-      intel_region_map(intel, depthRb->region);
-      depthRb->pfMap = depthRb->region->map;
-      depthRb->pfPitch = depthRb->region->pitch;
-   }
-   if (stencilRb && stencilRb->region) {
-      intel_region_map(intel, stencilRb->region);
-      stencilRb->pfMap = stencilRb->region->map;
-      stencilRb->pfPitch = stencilRb->region->pitch;
-   }
-}
-
-static void
-unmap_regions(GLcontext * ctx,
-              struct intel_renderbuffer *depthRb,
-              struct intel_renderbuffer *stencilRb)
-{
-   struct intel_context *intel = intel_context(ctx);
-   if (depthRb && depthRb->region) {
-      intel_region_unmap(intel, depthRb->region);
-      depthRb->pfMap = NULL;
-      depthRb->pfPitch = 0;
-   }
-   if (stencilRb && stencilRb->region) {
-      intel_region_unmap(intel, stencilRb->region);
-      stencilRb->pfMap = NULL;
-      stencilRb->pfPitch = 0;
-   }
-}
-
-
-
 /**
 * Undo the pairing/interleaving between depth and stencil buffers.
 * irb should be a depth/stencil or stencil renderbuffer.
 */
 void
-intel_unpair_depth_stencil(GLcontext * ctx, struct intel_renderbuffer *irb)
+intel_unpair_depth_stencil(GLcontext *ctx, struct intel_renderbuffer *irb)
 {
+   struct intel_context *intel = intel_context(ctx);
+   struct gl_renderbuffer *rb = &irb->Base;
+
   if (irb->PairedStencil) {
      /* irb is a depth/stencil buffer */
      struct gl_renderbuffer *stencilRb;
      struct intel_renderbuffer *stencilIrb;

-      ASSERT(irb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT);
+      ASSERT(rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT);

      stencilRb = _mesa_lookup_renderbuffer(ctx, irb->PairedStencil);
      stencilIrb = intel_renderbuffer(stencilRb);
      if (stencilIrb) {
         /* need to extract stencil values from the depth buffer */
-         ASSERT(stencilIrb->PairedDepth == irb->Base.Name);
-         map_regions(ctx, irb, stencilIrb);
-         _mesa_extract_stencil(ctx, &irb->Base, &stencilIrb->Base);
-         unmap_regions(ctx, irb, stencilIrb);
+	 ASSERT(stencilIrb->PairedDepth == rb->Name);
+	 intel_renderbuffer_map(intel, rb);
+	 intel_renderbuffer_map(intel, stencilRb);
+	 _mesa_extract_stencil(ctx, rb, stencilRb);
+	 intel_renderbuffer_unmap(intel, stencilRb);
+	 intel_renderbuffer_unmap(intel, rb);
         stencilIrb->PairedDepth = 0;
      }
      irb->PairedStencil = 0;
@ -157,17 +122,19 @@ intel_unpair_depth_stencil(GLcontext * ctx, struct intel_renderbuffer *irb)
      struct gl_renderbuffer *depthRb;
      struct intel_renderbuffer *depthIrb;

-      ASSERT(irb->Base._ActualFormat == GL_STENCIL_INDEX8_EXT ||
-             irb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT);
+      ASSERT(rb->_ActualFormat == GL_STENCIL_INDEX8_EXT ||
+             rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT);

      depthRb = _mesa_lookup_renderbuffer(ctx, irb->PairedDepth);
      depthIrb = intel_renderbuffer(depthRb);
      if (depthIrb) {
         /* need to extract stencil values from the depth buffer */
-         ASSERT(depthIrb->PairedStencil == irb->Base.Name);
-         map_regions(ctx, depthIrb, irb);
-         _mesa_extract_stencil(ctx, &depthIrb->Base, &irb->Base);
-         unmap_regions(ctx, depthIrb, irb);
+	 ASSERT(depthIrb->PairedStencil == rb->Name);
+	 intel_renderbuffer_map(intel, rb);
+	 intel_renderbuffer_map(intel, depthRb);
+	 _mesa_extract_stencil(ctx, depthRb, rb);
+	 intel_renderbuffer_unmap(intel, depthRb);
+	 intel_renderbuffer_unmap(intel, rb);
         depthIrb->PairedStencil = 0;
      }
      irb->PairedDepth = 0;
@ -194,6 +161,7 @@ void
 intel_validate_paired_depth_stencil(GLcontext * ctx,
                                    struct gl_framebuffer *fb)
 {
+   struct intel_context *intel = intel_context(ctx);
   struct intel_renderbuffer *depthRb, *stencilRb;

   depthRb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
@ -230,9 +198,11 @@ intel_validate_paired_depth_stencil(GLcontext * ctx,
                   stencilRb->Base._ActualFormat == GL_DEPTH24_STENCIL8_EXT);

            /* establish new pairing: interleave stencil into depth buffer */
-            map_regions(ctx, depthRb, stencilRb);
+	    intel_renderbuffer_map(intel, &depthRb->Base);
+	    intel_renderbuffer_map(intel, &stencilRb->Base);
            _mesa_insert_stencil(ctx, &depthRb->Base, &stencilRb->Base);
-            unmap_regions(ctx, depthRb, stencilRb);
+	    intel_renderbuffer_unmap(intel, &stencilRb->Base);
+	    intel_renderbuffer_unmap(intel, &depthRb->Base);
            depthRb->PairedStencil = stencilRb->Base.Name;
            stencilRb->PairedDepth = depthRb->Base.Name;
         }
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@ -153,6 +153,9 @@ intel_delete_renderbuffer(struct gl_renderbuffer *rb)
      intel_unpair_depth_stencil(ctx, irb);
   }

+   if (irb->span_cache != NULL)
+      _mesa_free(irb->span_cache);
+
   if (intel && irb->region) {
      intel_region_release(&irb->region);
   }
@ -209,6 +212,14 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
   case GL_RGB10:
   case GL_RGB12:
   case GL_RGB16:
+      rb->_ActualFormat = GL_RGB8;
+      rb->DataType = GL_UNSIGNED_BYTE;
+      rb->RedBits = 8;
+      rb->GreenBits = 8;
+      rb->BlueBits = 8;
+      rb->AlphaBits = 0;
+      cpp = 4;
+      break;
   case GL_RGBA:
   case GL_RGBA2:
   case GL_RGBA4:
@ -294,9 +305,6 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
      rb->Width = width;
      rb->Height = height;

-      /* This sets the Get/PutRow/Value functions */
-      intel_set_span_functions(&irb->Base);
-
      return GL_TRUE;
   }
 }
@ -366,7 +374,6 @@ intel_renderbuffer_set_region(struct intel_renderbuffer *rb,
   intel_region_reference(&rb->region, region);
   intel_region_release(&old);

-   rb->pfMap = region->map;
   rb->pfPitch = region->pitch;
 }

@ -446,8 +453,6 @@ intel_create_renderbuffer(GLenum intFormat)
   irb->Base.Delete = intel_delete_renderbuffer;
   irb->Base.AllocStorage = intel_alloc_window_storage;
   irb->Base.GetPointer = intel_get_pointer;
-   /* This sets the Get/PutRow/Value functions */
-   intel_set_span_functions(&irb->Base);

   return irb;
 }
@ -519,7 +524,7 @@ intel_framebuffer_renderbuffer(GLcontext * ctx,

 static GLboolean
 intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, 
-                          struct gl_texture_image *texImage)
+		     struct gl_texture_image *texImage)
 {
   if (texImage->TexFormat == &_mesa_texformat_argb8888) {
      irb->Base._ActualFormat = GL_RGBA8;
@ -558,7 +563,6 @@ intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb,

   irb->Base.Delete = intel_delete_renderbuffer;
   irb->Base.AllocStorage = intel_nop_alloc_storage;
-   intel_set_span_functions(&irb->Base);

   irb->RenderToTexture = GL_TRUE;

--- a/src/mesa/drivers/dri/intel/intel_fbo.h
+++ b/src/mesa/drivers/dri/intel/intel_fbo.h
@ -28,9 +28,9 @@
 #ifndef INTEL_FBO_H
 #define INTEL_FBO_H

+#include "intel_screen.h"

 struct intel_context;
-struct intel_region;

 /**
 * Intel framebuffer, derived from gl_framebuffer.
@ -70,7 +70,6 @@ struct intel_renderbuffer
 {
   struct gl_renderbuffer Base;
   struct intel_region *region;
-   void *pfMap;                 /* possibly paged flipped map pointer */
   GLuint pfPitch;              /* possibly paged flipped pitch */
   GLboolean RenderToTexture;   /* RTT? */

@ -80,6 +79,9 @@ struct intel_renderbuffer
   GLuint pf_pending;  /**< sequence number of pending flip */

   GLuint vbl_pending;   /**< vblank sequence number of pending flip */
+
+   uint8_t *span_cache;
+   unsigned long span_cache_offset;
 };

 extern struct intel_renderbuffer *intel_renderbuffer(struct gl_renderbuffer
--- a/src/mesa/drivers/dri/intel/intel_ioctl.c
+++ b/src/mesa/drivers/dri/intel/intel_ioctl.c
@ -30,6 +30,8 @@
 #include <unistd.h>
 #include <errno.h>
 #include <sched.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>

 #include "mtypes.h"
 #include "context.h"
@ -43,7 +45,7 @@
 #include "drm.h"
 #include "i915_drm.h"

-#include "intel_bufmgr_ttm.h"
+#include "intel_bufmgr.h"

 #define FILE_DEBUG_FLAG DEBUG_IOCTL

@ -104,7 +106,7 @@ intelWaitIrq(struct intel_context *intel, int seq)
 }


-void
+int
 intel_batch_ioctl(struct intel_context *intel,
                  GLuint start_offset,
                  GLuint used,
@ -113,7 +115,7 @@ intel_batch_ioctl(struct intel_context *intel,
   struct drm_i915_batchbuffer batch;

   if (intel->no_hw)
-      return;
+      return 0;

   assert(intel->locked);
   assert(used);
@ -142,82 +144,42 @@ intel_batch_ioctl(struct intel_context *intel,
   if (drmCommandWrite(intel->driFd, DRM_I915_BATCHBUFFER, &batch,
                       sizeof(batch))) {
      fprintf(stderr, "DRM_I915_BATCHBUFFER: %d\n", -errno);
-      UNLOCK_HARDWARE(intel);
-      exit(1);
+      return -errno;
   }
+
+   return 0;
 }

-#ifdef TTM_API
-void
+int
 intel_exec_ioctl(struct intel_context *intel,
 		 GLuint used,
 		 GLboolean ignore_cliprects, GLboolean allow_unlock,
-		 void *start, GLuint count, dri_fence **fence)
+		 struct drm_i915_gem_execbuffer *execbuf)
 {
-   struct drm_i915_execbuffer execbuf;
-   dri_fence *fo;
   int ret;

   assert(intel->locked);
   assert(used);

   if (intel->no_hw)
-      return;
+      return 0;

-   if (*fence) {
-     dri_fence_unreference(*fence);
-   }
-
-   memset(&execbuf, 0, sizeof(execbuf));
-
-   execbuf.num_buffers = count;
-   execbuf.batch.used = used;
-   execbuf.batch.cliprects = intel->pClipRects;
-   execbuf.batch.num_cliprects = ignore_cliprects ? 0 : intel->numClipRects;
-   execbuf.batch.DR1 = 0;
-   execbuf.batch.DR4 = ((((GLuint) intel->drawX) & 0xffff) |
-			(((GLuint) intel->drawY) << 16));
-
-   execbuf.ops_list = (unsigned long)start; // TODO
-   execbuf.fence_arg.flags = DRM_FENCE_FLAG_SHAREABLE | DRM_I915_FENCE_FLAG_FLUSHED;
+   execbuf->batch_start_offset = 0;
+   execbuf->batch_len = used;
+   execbuf->cliprects_ptr = (uintptr_t)intel->pClipRects;
+   execbuf->num_cliprects = ignore_cliprects ? 0 : intel->numClipRects;
+   execbuf->DR1 = 0;
+   execbuf->DR4 = ((((GLuint) intel->drawX) & 0xffff) |
+		   (((GLuint) intel->drawY) << 16));

   do {
-      ret = drmCommandWriteRead(intel->driFd, DRM_I915_EXECBUFFER, &execbuf,
-				sizeof(execbuf));
+      ret = ioctl(intel->driFd, DRM_IOCTL_I915_GEM_EXECBUFFER, execbuf);
   } while (ret == -EAGAIN);

   if (ret != 0) {
-      fprintf(stderr, "DRM_I915_EXECBUFFER: %d\n", -errno);
-      UNLOCK_HARDWARE(intel);
-      exit(1);
+      fprintf(stderr, "DRM_I915_GEM_EXECBUFFER: %d\n", -errno);
+      return -errno;
   }

-   if (execbuf.fence_arg.error != 0) {
-
-      /*
-       * Fence creation has failed, but the GPU has been
-       * idled by the kernel. Safe to continue.
-       */ 
-
-      *fence = NULL;
-      return;
-   }
-
-   fo = intel_ttm_fence_create_from_arg(intel->bufmgr, "fence buffers",
-					&execbuf.fence_arg);
-   if (!fo) {
-      fprintf(stderr, "failed to fence handle: %08x\n", execbuf.fence_arg.handle);
-      UNLOCK_HARDWARE(intel);
-      exit(1);
-   }
-   *fence = fo;
+   return 0;
 }
-#else
-void
-intel_exec_ioctl(struct intel_context *intel,
-		 GLuint used,
-		 GLboolean ignore_cliprects, GLboolean allow_unlock,
-		 void *start, GLuint count, dri_fence **fence)
-{
-}
-#endif
--- a/src/mesa/drivers/dri/intel/intel_ioctl.h
+++ b/src/mesa/drivers/dri/intel/intel_ioctl.h
@ -33,14 +33,14 @@
 void intelWaitIrq( struct intel_context *intel, int seq );
 int intelEmitIrqLocked( struct intel_context *intel );

-void intel_batch_ioctl( struct intel_context *intel, 
-			GLuint start_offset,
-			GLuint used,
-			GLboolean ignore_cliprects,
-			GLboolean allow_unlock );
-void intel_exec_ioctl(struct intel_context *intel,
+int intel_batch_ioctl(struct intel_context *intel,
+		      GLuint start_offset,
 		      GLuint used,
-		      GLboolean ignore_cliprects, GLboolean allow_unlock,
-		      void *start, GLuint count, dri_fence **fence);
+		      GLboolean ignore_cliprects,
+		      GLboolean allow_unlock);
+int intel_exec_ioctl(struct intel_context *intel,
+		     GLuint used,
+		     GLboolean ignore_cliprects, GLboolean allow_unlock,
+		     struct drm_i915_gem_execbuffer *execbuf);

 #endif
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@ -43,7 +43,7 @@
 #include "intel_buffer_objects.h"
 #include "intel_buffers.h"
 #include "intel_pixel.h"
-
+#include "intel_reg.h"


 #define FILE_DEBUG_FLAG DEBUG_PIXEL
@ -293,7 +293,7 @@ do_blit_bitmap( GLcontext *ctx,
 						  dst->pitch,
 						  dst->buffer,
 						  0,
-						  dst->tiled,
+						  dst->tiling,
 						  rect.x1 + px,
 						  rect.y2 - (py + h),
 						  w, h,
@ -301,9 +301,8 @@ do_blit_bitmap( GLcontext *ctx,
 	    } 
 	 } 
      }
-   out:
-      intel_batchbuffer_flush(intel->batch);
   }
+out:
   UNLOCK_HARDWARE(intel);


--- a/src/mesa/drivers/dri/intel/intel_pixel_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c
@ -229,7 +229,7 @@ do_texture_copypixels(GLcontext * ctx,

    out:
      intel->vtbl.leave_meta_state(intel);
-      intel_batchbuffer_flush(intel->batch);
+      intel_batchbuffer_emit_mi_flush(intel->batch);
   }
   UNLOCK_HARDWARE(intel);

@ -337,18 +337,16 @@ do_blit_copypixels(GLcontext * ctx,
            continue;

         intelEmitCopyBlit(intel, dst->cpp,
-			   src->pitch, src->buffer, 0, src->tiled,
-			   dst->pitch, dst->buffer, 0, dst->tiled,
+			   src->pitch, src->buffer, 0, src->tiling,
+			   dst->pitch, dst->buffer, 0, dst->tiling,
 			   clip_x + delta_x, clip_y + delta_y, /* srcx, srcy */
 			   clip_x, clip_y, /* dstx, dsty */
 			   clip_w, clip_h,
 			   ctx->Color.ColorLogicOpEnabled ?
 			   ctx->Color.LogicOp : GL_COPY);
      }
-
-    out:
-      intel_batchbuffer_flush(intel->batch);
   }
+out:
   UNLOCK_HARDWARE(intel);

   DBG("%s: success\n", __FUNCTION__);
--- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
@ -81,7 +81,8 @@ do_texture_drawpixels(GLcontext * ctx,
   else {
      /* PBO only for now:
       */
-/*       _mesa_printf("%s - not PBO\n", __FUNCTION__); */
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+	 _mesa_printf("%s - not PBO\n", __FUNCTION__);
      return GL_FALSE;
   }

@ -180,7 +181,7 @@ do_texture_drawpixels(GLcontext * ctx,
 				 srcx, srcx + width, srcy + height, srcy);
    out:
      intel->vtbl.leave_meta_state(intel);
-      intel_batchbuffer_flush(intel->batch);
+      intel_batchbuffer_emit_mi_flush(intel->batch);
   }
   UNLOCK_HARDWARE(intel);
   return GL_TRUE;
@ -218,7 +219,6 @@ do_blit_drawpixels(GLcontext * ctx,
   struct intel_buffer_object *src = intel_buffer_object(unpack->BufferObj);
   GLuint src_offset;
   GLuint rowLength;
-   dri_fence *fence = NULL;

   if (INTEL_DEBUG & DEBUG_PIXEL)
      _mesa_printf("%s\n", __FUNCTION__);
@ -314,7 +314,7 @@ do_blit_drawpixels(GLcontext * ctx,
         intelEmitCopyBlit(intel,
                           dest->cpp,
                           rowLength, src_buffer, src_offset, GL_FALSE,
-                           dest->pitch, dest->buffer, 0, dest->tiled,
+                           dest->pitch, dest->buffer, 0, dest->tiling,
                           rect.x1 - dest_rect.x1,
                           rect.y2 - dest_rect.y2,
                           rect.x1,
@ -322,17 +322,9 @@ do_blit_drawpixels(GLcontext * ctx,
 			   ctx->Color.ColorLogicOpEnabled ?
 			   ctx->Color.LogicOp : GL_COPY);
      }
-      intel_batchbuffer_flush(intel->batch);
-      fence = intel->batch->last_fence;
-      dri_fence_reference(fence);
   }
   UNLOCK_HARDWARE(intel);

-   if (fence) {
-      dri_fence_wait(fence);
-      dri_fence_unreference(fence);
-   }
-
   if (INTEL_DEBUG & DEBUG_PIXEL)
      _mesa_printf("%s - DONE\n", __FUNCTION__);

--- a/src/mesa/drivers/dri/intel/intel_reg.h
+++ b/src/mesa/drivers/dri/intel/intel_reg.h
@ -31,11 +31,140 @@

 #define MI_BATCH_BUFFER_END		(CMD_MI | 0xA << 23)

+#define MI_FLUSH			(CMD_MI | (4 << 23))
+#define FLUSH_MAP_CACHE				(1 << 0)
+#define INHIBIT_FLUSH_RENDER_CACHE		(1 << 2)
+
 /* Stalls command execution waiting for the given events to have occurred. */
 #define MI_WAIT_FOR_EVENT               (CMD_MI | (0x3 << 23))
 #define MI_WAIT_FOR_PLANE_B_FLIP        (1<<6)
 #define MI_WAIT_FOR_PLANE_A_FLIP        (1<<2)

+/* p189 */
+#define _3DSTATE_LOAD_STATE_IMMEDIATE_1   (CMD_3D | (0x1d<<24) | (0x04<<16))
+#define I1_LOAD_S(n)                      (1<<(4+n))
+
+/** @{
+ * 915 definitions
+ */
+#define S0_VB_OFFSET_MASK		0xffffffc
+#define S0_AUTO_CACHE_INV_DISABLE	(1<<0)
+/** @} */
+
+/** @{
+ * 830 definitions
+ */
+#define S0_VB_OFFSET_MASK_830		0xffffff8
+#define S0_VB_PITCH_SHIFT_830		1
+#define S0_VB_ENABLE_830		0
+/** @} */
+
+#define S1_VERTEX_WIDTH_SHIFT          24
+#define S1_VERTEX_WIDTH_MASK           (0x3f<<24)
+#define S1_VERTEX_PITCH_SHIFT          16
+#define S1_VERTEX_PITCH_MASK           (0x3f<<16)
+
+#define TEXCOORDFMT_2D                 0x0
+#define TEXCOORDFMT_3D                 0x1
+#define TEXCOORDFMT_4D                 0x2
+#define TEXCOORDFMT_1D                 0x3
+#define TEXCOORDFMT_2D_16              0x4
+#define TEXCOORDFMT_4D_16              0x5
+#define TEXCOORDFMT_NOT_PRESENT        0xf
+#define S2_TEXCOORD_FMT0_MASK            0xf
+#define S2_TEXCOORD_FMT1_SHIFT           4
+#define S2_TEXCOORD_FMT(unit, type)    ((type)<<(unit*4))
+#define S2_TEXCOORD_NONE               (~0)
+#define S2_TEX_COUNT_SHIFT_830		12
+#define S2_VERTEX_0_WIDTH_SHIFT_830	0
+#define S2_VERTEX_1_WIDTH_SHIFT_830	6
+/* S3 not interesting */
+
+#define S4_POINT_WIDTH_SHIFT           23
+#define S4_POINT_WIDTH_MASK            (0x1ff<<23)
+#define S4_LINE_WIDTH_SHIFT            19
+#define S4_LINE_WIDTH_ONE              (0x2<<19)
+#define S4_LINE_WIDTH_MASK             (0xf<<19)
+#define S4_FLATSHADE_ALPHA             (1<<18)
+#define S4_FLATSHADE_FOG               (1<<17)
+#define S4_FLATSHADE_SPECULAR          (1<<16)
+#define S4_FLATSHADE_COLOR             (1<<15)
+#define S4_CULLMODE_BOTH	       (0<<13)
+#define S4_CULLMODE_NONE	       (1<<13)
+#define S4_CULLMODE_CW		       (2<<13)
+#define S4_CULLMODE_CCW		       (3<<13)
+#define S4_CULLMODE_MASK	       (3<<13)
+#define S4_VFMT_POINT_WIDTH            (1<<12)
+#define S4_VFMT_SPEC_FOG               (1<<11)
+#define S4_VFMT_COLOR                  (1<<10)
+#define S4_VFMT_DEPTH_OFFSET           (1<<9)
+#define S4_VFMT_XYZ     	       (1<<6)
+#define S4_VFMT_XYZW     	       (2<<6)
+#define S4_VFMT_XY     		       (3<<6)
+#define S4_VFMT_XYW     	       (4<<6)
+#define S4_VFMT_XYZW_MASK              (7<<6)
+#define S4_FORCE_DEFAULT_DIFFUSE       (1<<5)
+#define S4_FORCE_DEFAULT_SPECULAR      (1<<4)
+#define S4_LOCAL_DEPTH_OFFSET_ENABLE   (1<<3)
+#define S4_VFMT_FOG_PARAM              (1<<2)
+#define S4_SPRITE_POINT_ENABLE         (1<<1)
+#define S4_LINE_ANTIALIAS_ENABLE       (1<<0)
+
+#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH   | 	\
+		      S4_VFMT_SPEC_FOG      |	\
+		      S4_VFMT_COLOR         |	\
+		      S4_VFMT_DEPTH_OFFSET  |	\
+		      S4_VFMT_XYZW_MASK     |	\
+		      S4_VFMT_FOG_PARAM)
+
+
+#define S5_WRITEDISABLE_ALPHA          (1<<31)
+#define S5_WRITEDISABLE_RED            (1<<30)
+#define S5_WRITEDISABLE_GREEN          (1<<29)
+#define S5_WRITEDISABLE_BLUE           (1<<28)
+#define S5_WRITEDISABLE_MASK           (0xf<<28)
+#define S5_FORCE_DEFAULT_POINT_SIZE    (1<<27)
+#define S5_LAST_PIXEL_ENABLE           (1<<26)
+#define S5_GLOBAL_DEPTH_OFFSET_ENABLE  (1<<25)
+#define S5_FOG_ENABLE                  (1<<24)
+#define S5_STENCIL_REF_SHIFT           16
+#define S5_STENCIL_REF_MASK            (0xff<<16)
+#define S5_STENCIL_TEST_FUNC_SHIFT     13
+#define S5_STENCIL_TEST_FUNC_MASK      (0x7<<13)
+#define S5_STENCIL_FAIL_SHIFT          10
+#define S5_STENCIL_FAIL_MASK           (0x7<<10)
+#define S5_STENCIL_PASS_Z_FAIL_SHIFT   7
+#define S5_STENCIL_PASS_Z_FAIL_MASK    (0x7<<7)
+#define S5_STENCIL_PASS_Z_PASS_SHIFT   4
+#define S5_STENCIL_PASS_Z_PASS_MASK    (0x7<<4)
+#define S5_STENCIL_WRITE_ENABLE        (1<<3)
+#define S5_STENCIL_TEST_ENABLE         (1<<2)
+#define S5_COLOR_DITHER_ENABLE         (1<<1)
+#define S5_LOGICOP_ENABLE              (1<<0)
+
+
+#define S6_ALPHA_TEST_ENABLE           (1<<31)
+#define S6_ALPHA_TEST_FUNC_SHIFT       28
+#define S6_ALPHA_TEST_FUNC_MASK        (0x7<<28)
+#define S6_ALPHA_REF_SHIFT             20
+#define S6_ALPHA_REF_MASK              (0xff<<20)
+#define S6_DEPTH_TEST_ENABLE           (1<<19)
+#define S6_DEPTH_TEST_FUNC_SHIFT       16
+#define S6_DEPTH_TEST_FUNC_MASK        (0x7<<16)
+#define S6_CBUF_BLEND_ENABLE           (1<<15)
+#define S6_CBUF_BLEND_FUNC_SHIFT       12
+#define S6_CBUF_BLEND_FUNC_MASK        (0x7<<12)
+#define S6_CBUF_SRC_BLEND_FACT_SHIFT   8
+#define S6_CBUF_SRC_BLEND_FACT_MASK    (0xf<<8)
+#define S6_CBUF_DST_BLEND_FACT_SHIFT   4
+#define S6_CBUF_DST_BLEND_FACT_MASK    (0xf<<4)
+#define S6_DEPTH_WRITE_ENABLE          (1<<3)
+#define S6_COLOR_WRITE_ENABLE          (1<<2)
+#define S6_TRISTRIP_PV_SHIFT           0
+#define S6_TRISTRIP_PV_MASK            (0x3<<0)
+
+#define S7_DEPTH_OFFSET_CONST_MASK     ~0
+
 /* Primitive dispatch on 830-945 */
 #define _3DPRIMITIVE			(CMD_3D | (0x1f << 24))
 #define PRIM_INDIRECT            (1<<23)
--- a/src/mesa/drivers/dri/intel/intel_regions.c
+++ b/src/mesa/drivers/dri/intel/intel_regions.c
@ -39,13 +39,17 @@
 * last moment.
 */

+#include <sys/ioctl.h>
+#include <errno.h>
+
 #include "intel_context.h"
 #include "intel_regions.h"
 #include "intel_blit.h"
 #include "intel_buffer_objects.h"
 #include "dri_bufmgr.h"
-#include "intel_bufmgr_ttm.h"
+#include "intel_bufmgr.h"
 #include "intel_batchbuffer.h"
+#include "intel_chipset.h"

 #define FILE_DEBUG_FLAG DEBUG_REGION

@ -76,10 +80,34 @@ intel_region_unmap(struct intel_context *intel, struct intel_region *region)
   }
 }

+static int
+intel_set_region_tiling_gem(struct intel_context *intel,
+			    struct intel_region *region,
+			    uint32_t bo_handle)
+{
+   struct drm_i915_gem_get_tiling get_tiling;
+   int ret;
+
+   memset(&get_tiling, 0, sizeof(get_tiling));
+
+   get_tiling.handle = bo_handle;
+   ret = ioctl(intel->driFd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling);
+   if (ret != 0) {
+      fprintf(stderr, "Failed to get tiling state for region: %s\n",
+	      strerror(errno));
+      return ret;
+   }
+
+   region->tiling = get_tiling.tiling_mode;
+   region->bit_6_swizzle = get_tiling.swizzle_mode;
+
+   return 0;
+}
+
 static struct intel_region *
 intel_region_alloc_internal(struct intel_context *intel,
 			    GLuint cpp, GLuint pitch, GLuint height,
-			    GLuint tiled, dri_bo *buffer)
+			    dri_bo *buffer)
 {
   struct intel_region *region;

@ -93,9 +121,12 @@ intel_region_alloc_internal(struct intel_context *intel,
   region->pitch = pitch;
   region->height = height;     /* needed? */
   region->refcount = 1;
-   region->tiled = tiled;
   region->buffer = buffer;

+   /* Default to no tiling */
+   region->tiling = I915_TILING_NONE;
+   region->bit_6_swizzle = I915_BIT_6_SWIZZLE_NONE;
+
   return region;
 }

@ -106,25 +137,28 @@ intel_region_alloc(struct intel_context *intel,
   dri_bo *buffer;

   buffer = dri_bo_alloc(intel->bufmgr, "region",
-			 pitch * cpp * height, 64,
-			 DRM_BO_FLAG_MEM_LOCAL |
-			 DRM_BO_FLAG_CACHED |
-			 DRM_BO_FLAG_CACHED_MAPPED);
+			 pitch * cpp * height, 64);

-   return intel_region_alloc_internal(intel, cpp, pitch, height, 0, buffer);
+   return intel_region_alloc_internal(intel, cpp, pitch, height, buffer);
 }

 struct intel_region *
 intel_region_alloc_for_handle(struct intel_context *intel,
 			      GLuint cpp, GLuint pitch, GLuint height,
-			      GLuint tiled, GLuint handle)
+			      GLuint handle)
 {
+   struct intel_region *region;
   dri_bo *buffer;

-   buffer = intel_ttm_bo_create_from_handle(intel->bufmgr, "region", handle);
+   buffer = intel_bo_gem_create_from_name(intel->bufmgr, "dri2 region", handle);

-   return intel_region_alloc_internal(intel,
-				      cpp, pitch, height, tiled, buffer);
+   region = intel_region_alloc_internal(intel, cpp, pitch, height, buffer);
+   if (region == NULL)
+      return region;
+
+   intel_set_region_tiling_gem(intel, region, handle);
+
+   return region;
 }

 void
@ -138,26 +172,34 @@ intel_region_reference(struct intel_region **dst, struct intel_region *src)
 }

 void
-intel_region_release(struct intel_region **region)
+intel_region_release(struct intel_region **region_handle)
 {
-   if (!*region)
+   struct intel_region *region = *region_handle;
+
+   if (region == NULL)
      return;

-   DBG("%s %d\n", __FUNCTION__, (*region)->refcount - 1);
+   DBG("%s %d\n", __FUNCTION__, region->refcount - 1);

-   ASSERT((*region)->refcount > 0);
-   (*region)->refcount--;
+   ASSERT(region->refcount > 0);
+   region->refcount--;

-   if ((*region)->refcount == 0) {
-      assert((*region)->map_refcount == 0);
+   if (region->refcount == 0) {
+      assert(region->map_refcount == 0);

-      if ((*region)->pbo)
-	 (*region)->pbo->region = NULL;
-      (*region)->pbo = NULL;
-      dri_bo_unreference((*region)->buffer);
-      free(*region);
+      if (region->pbo)
+	 region->pbo->region = NULL;
+      region->pbo = NULL;
+      dri_bo_unreference(region->buffer);
+
+      if (region->classic_map != NULL) {
+	 drmUnmap(region->classic_map,
+			region->pitch * region->cpp * region->height);
+      }
+
+      free(region);
   }
-   *region = NULL;
+   *region_handle = NULL;
 }

 /*
@ -272,8 +314,8 @@ intel_region_copy(struct intel_context *intel,

   intelEmitCopyBlit(intel,
                     dst->cpp,
-                     src->pitch, src->buffer, src_offset, src->tiled,
-                     dst->pitch, dst->buffer, dst_offset, dst->tiled,
+                     src->pitch, src->buffer, src_offset, src->tiling,
+                     dst->pitch, dst->buffer, dst_offset, dst->tiling,
                     srcx, srcy, dstx, dsty, width, height,
 		     GL_COPY);
 }
@ -303,7 +345,7 @@ intel_region_fill(struct intel_context *intel,

   intelEmitFillBlit(intel,
                     dst->cpp,
-                     dst->pitch, dst->buffer, dst_offset, dst->tiled,
+                     dst->pitch, dst->buffer, dst_offset, dst->tiling,
                     dstx, dsty, width, height, color);
 }

@ -355,10 +397,7 @@ intel_region_release_pbo(struct intel_context *intel,

   region->buffer = dri_bo_alloc(intel->bufmgr, "region",
 				 region->pitch * region->cpp * region->height,
-				 64,
-				 DRM_BO_FLAG_MEM_LOCAL |
-				 DRM_BO_FLAG_CACHED |
-				 DRM_BO_FLAG_CACHED_MAPPED);
+				 64);
 }

 /* Break the COW tie to the pbo.  Both the pbo and the region end up
@ -382,23 +421,19 @@ intel_region_cow(struct intel_context *intel, struct intel_region *region)
   /* Now blit from the texture buffer to the new buffer: 
    */

-   intel_batchbuffer_flush(intel->batch);
-
   was_locked = intel->locked;
-   if (intel->locked)
+   if (!was_locked)
      LOCK_HARDWARE(intel);

   intelEmitCopyBlit(intel,
 		     region->cpp,
-		     region->pitch, region->buffer, 0, region->tiled,
-		     region->pitch, pbo->buffer, 0, region->tiled,
+		     region->pitch, region->buffer, 0, region->tiling,
+		     region->pitch, pbo->buffer, 0, region->tiling,
 		     0, 0, 0, 0,
 		     region->pitch, region->height,
 		     GL_COPY);

-   intel_batchbuffer_flush(intel->batch);
-
-   if (was_locked)
+   if (!was_locked)
      UNLOCK_HARDWARE(intel);
 }

@ -424,6 +459,7 @@ intel_recreate_static(struct intel_context *intel,
 		      GLuint mem_type)
 {
   intelScreenPrivate *intelScreen = intel->intelScreen;
+   int ret;

   if (region == NULL) {
      region = calloc(sizeof(*region), 1);
@ -436,21 +472,45 @@ intel_recreate_static(struct intel_context *intel,
      region->cpp = intel->ctx.Visual.rgbBits / 8;
   region->pitch = intelScreen->pitch;
   region->height = intelScreen->height;     /* needed? */
-   region->tiled = region_desc->tiled;

   if (intel->ttm) {
      assert(region_desc->bo_handle != -1);
-      region->buffer = intel_ttm_bo_create_from_handle(intel->bufmgr,
-						       name,
-						       region_desc->bo_handle);
+      region->buffer = intel_bo_gem_create_from_name(intel->bufmgr,
+						     name,
+						     region_desc->bo_handle);
+
+      intel_set_region_tiling_gem(intel, region, region_desc->bo_handle);
   } else {
-      region->buffer = dri_bo_alloc_static(intel->bufmgr,
-					   name,
-					   region_desc->offset,
-					   intelScreen->pitch *
-					   intelScreen->height,
-					   region_desc->map,
-					   DRM_BO_FLAG_MEM_TT);
+      ret = drmMap(intel->driFd, region_desc->handle,
+		   region->pitch * region->cpp * region->height,
+		   &region->classic_map);
+      if (ret != 0) {
+	 fprintf(stderr, "Failed to drmMap %s buffer\n", name);
+	 free(region);
+	 return NULL;
+      }
+
+      region->buffer = intel_bo_fake_alloc_static(intel->bufmgr,
+						  name,
+						  region_desc->offset,
+						  region->pitch * region->cpp *
+						  region->height,
+						  region->classic_map);
+
+      /* The sarea just gives us a boolean for whether it's tiled or not,
+       * instead of which tiling mode it is.  Guess.
+       */
+      if (region_desc->tiled) {
+	 if (IS_965(intel->intelScreen->deviceID) &&
+	     region_desc == &intelScreen->depth)
+	    region->tiling = I915_TILING_Y;
+	 else
+	    region->tiling = I915_TILING_X;
+      } else {
+	 region->tiling = I915_TILING_NONE;
+      }
+
+      region->bit_6_swizzle = I915_BIT_6_SWIZZLE_NONE;
   }

   assert(region->buffer != NULL);
--- a/src/mesa/drivers/dri/intel/intel_regions.h
+++ b/src/mesa/drivers/dri/intel/intel_regions.h
@ -28,6 +28,12 @@
 #ifndef INTEL_REGIONS_H
 #define INTEL_REGIONS_H

+/** @file intel_regions.h
+ *
+ * Structure definitions and prototypes for intel_region handling, which is
+ * the basic structure for rectangular collections of pixels stored in a dri_bo.
+ */
+
 #include "mtypes.h"
 #include "dri_bufmgr.h"

@ -53,8 +59,9 @@ struct intel_region
   GLuint map_refcount;  /**< Reference count for mapping */

   GLuint draw_offset; /**< Offset of drawing address within the region */
-   GLboolean tiled; /**< True if the region is X or Y-tiled.  Used on 965. */
-
+   uint32_t tiling; /**< Which tiling mode the region is in */
+   uint32_t bit_6_swizzle; /**< GEM flag for address swizzling requirement */
+   drmAddress classic_map; /**< drmMap of the region when not in GEM mode */
   struct intel_buffer_object *pbo;     /* zero-copy uploads */
 };

@ -69,7 +76,7 @@ struct intel_region *intel_region_alloc(struct intel_context *intel,
 struct intel_region *
 intel_region_alloc_for_handle(struct intel_context *intel,
 			      GLuint cpp, GLuint pitch, GLuint height,
-			      GLuint tiled, unsigned int handle);
+			      unsigned int handle);

 void intel_region_reference(struct intel_region **dst,
                            struct intel_region *src);
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@ -49,7 +49,7 @@
 #include "i830_dri.h"
 #include "intel_regions.h"
 #include "intel_batchbuffer.h"
-#include "intel_bufmgr_ttm.h"
+#include "intel_bufmgr.h"

 PUBLIC const char __driConfigOptions[] =
   DRI_CONF_BEGIN
@ -59,7 +59,7 @@ PUBLIC const char __driConfigOptions[] =
      /* Options correspond to DRI_CONF_BO_REUSE_DISABLED,
       * DRI_CONF_BO_REUSE_ALL
       */
-      DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 0, "0:1")
+      DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 1, "0:1")
 	 DRI_CONF_DESC_BEGIN(en, "Buffer object reuse")
 	    DRI_CONF_ENUM(0, "Disable buffer object reuse")
 	    DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects")
@ -90,51 +90,6 @@ intelMapScreenRegions(__DRIscreenPrivate * sPriv)
 {
   intelScreenPrivate *intelScreen = (intelScreenPrivate *) sPriv->private;

-   if (intelScreen->front.handle) {
-      if (drmMap(sPriv->fd,
-                 intelScreen->front.handle,
-                 intelScreen->front.size,
-                 (drmAddress *) & intelScreen->front.map) != 0) {
-         _mesa_problem(NULL, "drmMap(frontbuffer) failed!");
-         return GL_FALSE;
-      }
-   }
-   else {
-      _mesa_warning(NULL, "no front buffer handle in intelMapScreenRegions!");
-   }
-
-   if (0)
-      _mesa_printf("Back 0x%08x ", intelScreen->back.handle);
-   if (drmMap(sPriv->fd,
-              intelScreen->back.handle,
-              intelScreen->back.size,
-              (drmAddress *) & intelScreen->back.map) != 0) {
-      intelUnmapScreenRegions(intelScreen);
-      return GL_FALSE;
-   }
-
-   if (intelScreen->third.handle) {
-      if (0)
-	 _mesa_printf("Third 0x%08x ", intelScreen->third.handle);
-      if (drmMap(sPriv->fd,
-		 intelScreen->third.handle,
-		 intelScreen->third.size,
-		 (drmAddress *) & intelScreen->third.map) != 0) {
-	 intelUnmapScreenRegions(intelScreen);
-	 return GL_FALSE;
-      }
-   }
-
-   if (0)
-      _mesa_printf("Depth 0x%08x ", intelScreen->depth.handle);
-   if (drmMap(sPriv->fd,
-              intelScreen->depth.handle,
-              intelScreen->depth.size,
-              (drmAddress *) & intelScreen->depth.map) != 0) {
-      intelUnmapScreenRegions(intelScreen);
-      return GL_FALSE;
-   }
-
   if (0)
      _mesa_printf("TEX 0x%08x ", intelScreen->tex.handle);
   if (intelScreen->tex.size != 0) {
@ -147,50 +102,15 @@ intelMapScreenRegions(__DRIscreenPrivate * sPriv)
      }
   }

-   if (0)
-      printf("Mappings:  front: %p  back: %p  third: %p  depth: %p  tex: %p\n",
-             intelScreen->front.map,
-             intelScreen->back.map, intelScreen->third.map,
-             intelScreen->depth.map, intelScreen->tex.map);
   return GL_TRUE;
 }

 void
 intelUnmapScreenRegions(intelScreenPrivate * intelScreen)
 {
-#define REALLY_UNMAP 1
-   if (intelScreen->front.map) {
-#if REALLY_UNMAP
-      if (drmUnmap(intelScreen->front.map, intelScreen->front.size) != 0)
-         printf("drmUnmap front failed!\n");
-#endif
-      intelScreen->front.map = NULL;
-   }
-   if (intelScreen->back.map) {
-#if REALLY_UNMAP
-      if (drmUnmap(intelScreen->back.map, intelScreen->back.size) != 0)
-         printf("drmUnmap back failed!\n");
-#endif
-      intelScreen->back.map = NULL;
-   }
-   if (intelScreen->third.map) {
-#if REALLY_UNMAP
-      if (drmUnmap(intelScreen->third.map, intelScreen->third.size) != 0)
-         printf("drmUnmap third failed!\n");
-#endif
-      intelScreen->third.map = NULL;
-   }
-   if (intelScreen->depth.map) {
-#if REALLY_UNMAP
-      drmUnmap(intelScreen->depth.map, intelScreen->depth.size);
-      intelScreen->depth.map = NULL;
-#endif
-   }
   if (intelScreen->tex.map) {
-#if REALLY_UNMAP
      drmUnmap(intelScreen->tex.map, intelScreen->tex.size);
      intelScreen->tex.map = NULL;
-#endif
   }
 }

@ -221,16 +141,16 @@ intelPrintSAREA(const struct drm_i915_sarea * sarea)
           sarea->height);
   fprintf(stderr, "SAREA: pitch: %d\n", sarea->pitch);
   fprintf(stderr,
-           "SAREA: front offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
+           "SAREA: front offset: 0x%08x  size: 0x%x  handle: 0x%x tiled: %d\n",
           sarea->front_offset, sarea->front_size,
-           (unsigned) sarea->front_handle);
+           (unsigned) sarea->front_handle, sarea->front_tiled);
   fprintf(stderr,
-           "SAREA: back  offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
+           "SAREA: back  offset: 0x%08x  size: 0x%x  handle: 0x%x tiled: %d\n",
           sarea->back_offset, sarea->back_size,
-           (unsigned) sarea->back_handle);
-   fprintf(stderr, "SAREA: depth offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
+           (unsigned) sarea->back_handle, sarea->back_tiled);
+   fprintf(stderr, "SAREA: depth offset: 0x%08x  size: 0x%x  handle: 0x%x tiled: %d\n",
           sarea->depth_offset, sarea->depth_size,
-           (unsigned) sarea->depth_handle);
+           (unsigned) sarea->depth_handle, sarea->depth_tiled);
   fprintf(stderr, "SAREA: tex   offset: 0x%08x  size: 0x%x  handle: 0x%x\n",
           sarea->tex_offset, sarea->tex_size, (unsigned) sarea->tex_handle);
 }
@ -334,8 +254,6 @@ intelHandleDrawableConfig(__DRIdrawablePrivate *dPriv,
    * attached. */
 }

-#define BUFFER_FLAG_TILED 0x0100
-
 /**
 * DRI2 entrypoint
 */
@ -348,7 +266,6 @@ intelHandleBufferAttach(__DRIdrawablePrivate *dPriv,
   struct intel_renderbuffer *rb;
   struct intel_region *region;
   struct intel_context *intel = pcp->driverPrivate;
-   GLuint tiled;

   switch (ba->buffer.attachment) {
   case DRI_DRAWABLE_BUFFER_FRONT_LEFT:
@ -382,10 +299,9 @@ intelHandleBufferAttach(__DRIdrawablePrivate *dPriv,
      return;
 #endif

-   tiled = (ba->buffer.flags & BUFFER_FLAG_TILED) > 0;
   region = intel_region_alloc_for_handle(intel, ba->buffer.cpp,
 					  ba->buffer.pitch / ba->buffer.cpp,
-					  dPriv->h, tiled,
+					  dPriv->h,
 					  ba->buffer.handle);

   intel_renderbuffer_set_region(rb, region);
@ -530,14 +446,13 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
      _mesa_initialize_framebuffer(&intel_fb->Base, mesaVis);

      /* setup the hardware-based renderbuffers */
-      {
-         intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat);
-         _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT,
-				&intel_fb->color_rb[0]->Base);
-      }
+      intel_fb->color_rb[0] = intel_create_renderbuffer(rgbFormat);
+      _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_FRONT_LEFT,
+			     &intel_fb->color_rb[0]->Base);

      if (mesaVis->doubleBufferMode) {
-         intel_fb->color_rb[1] = intel_create_renderbuffer(rgbFormat);
+	 intel_fb->color_rb[1] = intel_create_renderbuffer(rgbFormat);
+
         _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_BACK_LEFT,
 				&intel_fb->color_rb[1]->Base);

@ -569,7 +484,7 @@ intelCreateBuffer(__DRIscreenPrivate * driScrnPriv,
      else if (mesaVis->depthBits == 16) {
         /* just 16-bit depth buffer, no hw stencil */
         struct intel_renderbuffer *depthRb
-            = intel_create_renderbuffer(GL_DEPTH_COMPONENT16);
+	    = intel_create_renderbuffer(GL_DEPTH_COMPONENT16);
         _mesa_add_renderbuffer(&intel_fb->Base, BUFFER_DEPTH, &depthRb->Base);
      }

--- a/src/mesa/drivers/dri/intel/intel_screen.h
+++ b/src/mesa/drivers/dri/intel/intel_screen.h
@ -74,6 +74,8 @@ typedef struct
   int irq_active;
   int allow_batchbuffer;

+   int ttm;
+
   /**
   * Configuration cache with default values for all contexts
   */
--- a/src/mesa/drivers/dri/intel/intel_span.c
+++ b/src/mesa/drivers/dri/intel/intel_span.c
@ -39,6 +39,224 @@

 #include "swrast/swrast.h"

+static void
+intel_set_span_functions(struct intel_context *intel,
+			 struct gl_renderbuffer *rb);
+
+#define SPAN_CACHE_SIZE		4096
+
+static void
+get_span_cache(struct intel_renderbuffer *irb, uint32_t offset)
+{
+   if (irb->span_cache == NULL) {
+      irb->span_cache = _mesa_malloc(SPAN_CACHE_SIZE);
+      irb->span_cache_offset = -1;
+   }
+
+   if ((offset & ~(SPAN_CACHE_SIZE - 1)) != irb->span_cache_offset) {
+      irb->span_cache_offset = offset & ~(SPAN_CACHE_SIZE - 1);
+      dri_bo_get_subdata(irb->region->buffer, irb->span_cache_offset,
+			 SPAN_CACHE_SIZE, irb->span_cache);
+   }
+}
+
+static void
+clear_span_cache(struct intel_renderbuffer *irb)
+{
+   irb->span_cache_offset = -1;
+}
+
+static uint32_t
+pread_32(struct intel_renderbuffer *irb, uint32_t offset)
+{
+   get_span_cache(irb, offset);
+
+   return *(uint32_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1)));
+}
+
+static uint32_t
+pread_xrgb8888(struct intel_renderbuffer *irb, uint32_t offset)
+{
+   get_span_cache(irb, offset);
+
+   return *(uint32_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1))) |
+      0xff000000;
+}
+
+static uint16_t
+pread_16(struct intel_renderbuffer *irb, uint32_t offset)
+{
+   get_span_cache(irb, offset);
+
+   return *(uint16_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1)));
+}
+
+static uint8_t
+pread_8(struct intel_renderbuffer *irb, uint32_t offset)
+{
+   get_span_cache(irb, offset);
+
+   return *(uint8_t *)(irb->span_cache + (offset & (SPAN_CACHE_SIZE - 1)));
+}
+
+static void
+pwrite_32(struct intel_renderbuffer *irb, uint32_t offset, uint32_t val)
+{
+   clear_span_cache(irb);
+
+   dri_bo_subdata(irb->region->buffer, offset, 4, &val);
+}
+
+static void
+pwrite_xrgb8888(struct intel_renderbuffer *irb, uint32_t offset, uint32_t val)
+{
+   clear_span_cache(irb);
+
+   dri_bo_subdata(irb->region->buffer, offset, 3, &val);
+}
+
+static void
+pwrite_16(struct intel_renderbuffer *irb, uint32_t offset, uint16_t val)
+{
+   clear_span_cache(irb);
+
+   dri_bo_subdata(irb->region->buffer, offset, 2, &val);
+}
+
+static void
+pwrite_8(struct intel_renderbuffer *irb, uint32_t offset, uint8_t val)
+{
+   clear_span_cache(irb);
+
+   dri_bo_subdata(irb->region->buffer, offset, 1, &val);
+}
+
+static uint32_t no_tile_swizzle(struct intel_renderbuffer *irb,
+				struct intel_context *intel,
+				int x, int y)
+{
+	x += intel->drawX;
+	y += intel->drawY;
+
+	return (y * irb->region->pitch + x) * irb->region->cpp;
+}
+
+/*
+ * Deal with tiled surfaces
+ */
+
+static uint32_t x_tile_swizzle(struct intel_renderbuffer *irb,
+			       struct intel_context *intel,
+			       int x, int y)
+{
+	int	tile_stride;
+	int	xbyte;
+	int	x_tile_off, y_tile_off;
+	int	x_tile_number, y_tile_number;
+	int	tile_off, tile_base;
+	
+	tile_stride = (irb->pfPitch * irb->region->cpp) << 3;
+	
+	x += intel->drawX;
+	y += intel->drawY;
+
+	xbyte = x * irb->region->cpp;
+
+	x_tile_off = xbyte & 0x1ff;
+	y_tile_off = y & 7;
+
+	x_tile_number = xbyte >> 9;
+	y_tile_number = y >> 3;
+
+	tile_off = (y_tile_off << 9) + x_tile_off;
+
+	switch (irb->region->bit_6_swizzle) {
+	case I915_BIT_6_SWIZZLE_NONE:
+	   break;
+	case I915_BIT_6_SWIZZLE_9:
+	   tile_off ^= ((tile_off >> 3) & 64);
+	   break;
+	case I915_BIT_6_SWIZZLE_9_10:
+	   tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64);
+	   break;
+	case I915_BIT_6_SWIZZLE_9_11:
+	   tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 5) & 64);
+	   break;
+	case I915_BIT_6_SWIZZLE_9_10_11:
+	   tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64) ^
+	      ((tile_off >> 5) & 64);
+	   break;
+	default:
+	   fprintf(stderr, "Unknown tile swizzling mode %d\n",
+		   irb->region->bit_6_swizzle);
+	   exit(1);
+	}
+
+	tile_base = (x_tile_number << 12) + y_tile_number * tile_stride;
+
+#if 0
+	printf("(%d,%d) -> %d + %d = %d (pitch = %d, tstride = %d)\n",
+	       x, y, tile_off, tile_base,
+	       tile_off + tile_base,
+	       irb->pfPitch, tile_stride);
+#endif
+
+	return tile_base + tile_off;
+}
+
+static uint32_t y_tile_swizzle(struct intel_renderbuffer *irb,
+			       struct intel_context *intel,
+			       int x, int y)
+{
+	int	tile_stride;
+	int	xbyte;
+	int	x_tile_off, y_tile_off;
+	int	x_tile_number, y_tile_number;
+	int	tile_off, tile_base;
+	
+	tile_stride = (irb->pfPitch * irb->region->cpp) << 5;
+	
+	x += intel->drawX;
+	y += intel->drawY;
+
+	xbyte = x * irb->region->cpp;
+
+	x_tile_off = xbyte & 0x7f;
+	y_tile_off = y & 0x1f;
+
+	x_tile_number = xbyte >> 7;
+	y_tile_number = y >> 5;
+
+	tile_off = ((x_tile_off & ~0xf) << 5) + (y_tile_off << 4) +
+	   (x_tile_off & 0xf);
+
+	switch (irb->region->bit_6_swizzle) {
+	case I915_BIT_6_SWIZZLE_NONE:
+	   break;
+	case I915_BIT_6_SWIZZLE_9:
+	   tile_off ^= ((tile_off >> 3) & 64);
+	   break;
+	case I915_BIT_6_SWIZZLE_9_10:
+	   tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64);
+	   break;
+	case I915_BIT_6_SWIZZLE_9_11:
+	   tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 5) & 64);
+	   break;
+	case I915_BIT_6_SWIZZLE_9_10_11:
+	   tile_off ^= ((tile_off >> 3) & 64) ^ ((tile_off >> 4) & 64) ^
+	      ((tile_off >> 5) & 64);
+	   break;
+	default:
+	   fprintf(stderr, "Unknown tile swizzling mode %d\n",
+		   irb->region->bit_6_swizzle);
+	   exit(1);
+	}
+
+	tile_base = (x_tile_number << 12) + y_tile_number * tile_stride;
+
+	return tile_base + tile_off;
+}
+
 /*
  break intelWriteRGBASpan_ARGB8888
 */
@ -51,10 +269,7 @@
   struct intel_renderbuffer *irb = intel_renderbuffer(rb);		\
   const GLint yScale = irb->RenderToTexture ? 1 : -1;			\
   const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1;	\
-   GLubyte *buf = (GLubyte *) irb->pfMap				\
-      + (intel->drawY * irb->pfPitch + intel->drawX) * irb->region->cpp;\
   GLuint p;								\
-   assert(irb->pfMap);\
   (void) p;

 /* XXX FBO: this is identical to the macro in spantmp2.h except we get
@ -69,12 +284,14 @@
 	 int miny = intel->pClipRects[_nc].y1 - intel->drawY;		\
 	 int maxx = intel->pClipRects[_nc].x2 - intel->drawX;		\
 	 int maxy = intel->pClipRects[_nc].y2 - intel->drawY;
-
-
-
+	
+#if 0
+      }}
+#endif

 #define Y_FLIP(_y) ((_y) * yScale + yBias)

+/* XXX with GEM, these need to tell the kernel */
 #define HW_LOCK()

 #define HW_UNLOCK()
@ -86,7 +303,8 @@

 #define TAG(x)    intel##x##_RGB565
 #define TAG2(x,y) intel##x##_RGB565##y
-#define GET_PTR(X,Y) (buf + ((Y) * irb->pfPitch + (X)) * 2)
+#define GET_VALUE(X, Y) pread_16(irb, no_tile_swizzle(irb, intel, X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_16(irb, no_tile_swizzle(irb, intel, X, Y), V)
 #include "spantmp2.h"

 /* 32 bit, ARGB8888 color spanline and pixel functions
@ -96,17 +314,89 @@

 #define TAG(x)    intel##x##_ARGB8888
 #define TAG2(x,y) intel##x##_ARGB8888##y
-#define GET_PTR(X,Y) (buf + ((Y) * irb->pfPitch + (X)) * 4)
+#define GET_VALUE(X, Y) pread_32(irb, no_tile_swizzle(irb, intel, X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_32(irb, no_tile_swizzle(irb, intel, X, Y), V)
+#include "spantmp2.h"
+
+/* 32 bit, xRGB8888 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x)    intel##x##_xRGB8888
+#define TAG2(x,y) intel##x##_xRGB8888##y
+#define GET_VALUE(X, Y) pread_xrgb8888(irb, no_tile_swizzle(irb, intel, X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, no_tile_swizzle(irb, intel, X, Y), V)
+#include "spantmp2.h"
+
+/* 16 bit RGB565 color tile spanline and pixel functions
+ */
+
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+
+#define TAG(x)    intel_XTile_##x##_RGB565
+#define TAG2(x,y) intel_XTile_##x##_RGB565##y
+#define GET_VALUE(X, Y) pread_16(irb, x_tile_swizzle(irb, intel, X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_16(irb, x_tile_swizzle(irb, intel, X, Y), V)
+#include "spantmp2.h"
+
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+
+#define TAG(x)    intel_YTile_##x##_RGB565
+#define TAG2(x,y) intel_YTile_##x##_RGB565##y
+#define GET_VALUE(X, Y) pread_16(irb, y_tile_swizzle(irb, intel, X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_16(irb, y_tile_swizzle(irb, intel, X, Y), V)
+#include "spantmp2.h"
+
+/* 32 bit ARGB888 color tile spanline and pixel functions
+ */
+
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x)    intel_XTile_##x##_ARGB8888
+#define TAG2(x,y) intel_XTile_##x##_ARGB8888##y
+#define GET_VALUE(X, Y) pread_32(irb, x_tile_swizzle(irb, intel, X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_32(irb, x_tile_swizzle(irb, intel, X, Y), V)
+#include "spantmp2.h"
+
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x)    intel_YTile_##x##_ARGB8888
+#define TAG2(x,y) intel_YTile_##x##_ARGB8888##y
+#define GET_VALUE(X, Y) pread_32(irb, y_tile_swizzle(irb, intel, X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_32(irb, y_tile_swizzle(irb, intel, X, Y), V)
+#include "spantmp2.h"
+
+/* 32 bit xRGB888 color tile spanline and pixel functions
+ */
+
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x)    intel_XTile_##x##_xRGB8888
+#define TAG2(x,y) intel_XTile_##x##_xRGB8888##y
+#define GET_VALUE(X, Y) pread_xrgb8888(irb, x_tile_swizzle(irb, intel, X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, x_tile_swizzle(irb, intel, X, Y), V)
+#include "spantmp2.h"
+
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x)    intel_YTile_##x##_xRGB8888
+#define TAG2(x,y) intel_YTile_##x##_xRGB8888##y
+#define GET_VALUE(X, Y) pread_xrgb8888(irb, y_tile_swizzle(irb, intel, X, Y))
+#define PUT_VALUE(X, Y, V) pwrite_xrgb8888(irb, y_tile_swizzle(irb, intel, X, Y), V)
 #include "spantmp2.h"

 #define LOCAL_DEPTH_VARS						\
   struct intel_context *intel = intel_context(ctx);			\
   struct intel_renderbuffer *irb = intel_renderbuffer(rb);		\
-   const GLuint pitch = irb->pfPitch/***XXX region->pitch*/; /* in pixels */ \
   const GLint yScale = irb->RenderToTexture ? 1 : -1;			\
-   const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1;	\
-   char *buf = (char *) irb->pfMap/*XXX use region->map*/ +             \
-      (intel->drawY * pitch + intel->drawX) * irb->region->cpp;
+   const GLint yBias = irb->RenderToTexture ? 0 : irb->Base.Height - 1;


 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
@ -115,18 +405,37 @@
 ** 16-bit depthbuffer functions.
 **/
 #define VALUE_TYPE GLushort
-
-#define WRITE_DEPTH( _x, _y, d ) \
-   ((GLushort *)buf)[(_x) + (_y) * pitch] = d;
-
-#define READ_DEPTH( d, _x, _y )	\
-   d = ((GLushort *)buf)[(_x) + (_y) * pitch];
-
-
+#define WRITE_DEPTH(_x, _y, d) \
+   pwrite_16(irb, no_tile_swizzle(irb, intel, _x, _y), d)
+#define READ_DEPTH(d, _x, _y) \
+   d = pread_16(irb, no_tile_swizzle(irb, intel, _x, _y))
 #define TAG(x) intel##x##_z16
 #include "depthtmp.h"


+/**
+ ** 16-bit x tile depthbuffer functions.
+ **/
+#define VALUE_TYPE GLushort
+#define WRITE_DEPTH(_x, _y, d) \
+   pwrite_16(irb, x_tile_swizzle(irb, intel, _x, _y), d)
+#define READ_DEPTH(d, _x, _y) \
+   d = pread_16(irb, x_tile_swizzle(irb, intel, _x, _y))
+#define TAG(x) intel_XTile_##x##_z16
+#include "depthtmp.h"
+
+/**
+ ** 16-bit y tile depthbuffer functions.
+ **/
+#define VALUE_TYPE GLushort
+#define WRITE_DEPTH(_x, _y, d) \
+   pwrite_16(irb, y_tile_swizzle(irb, intel, _x, _y), d)
+#define READ_DEPTH(d, _x, _y) \
+   d = pread_16(irb, y_tile_swizzle(irb, intel, _x, _y))
+#define TAG(x) intel_YTile_##x##_z16
+#include "depthtmp.h"
+
+
 /**
 ** 24/8-bit interleaved depth/stencil functions
 ** Note: we're actually reading back combined depth+stencil values.
@ -136,14 +445,13 @@
 #define VALUE_TYPE GLuint

 /* Change ZZZS -> SZZZ */
-#define WRITE_DEPTH( _x, _y, d ) {				\
-   GLuint tmp = ((d) >> 8) | ((d) << 24);			\
-   ((GLuint *)buf)[(_x) + (_y) * pitch] = tmp;			\
-}
+#define WRITE_DEPTH(_x, _y, d)					\
+   pwrite_32(irb, no_tile_swizzle(irb, intel, _x, _y),		\
+	     ((d) >> 8) | ((d) << 24))

 /* Change SZZZ -> ZZZS */
 #define READ_DEPTH( d, _x, _y ) {				\
-   GLuint tmp = ((GLuint *)buf)[(_x) + (_y) * pitch];		\
+   GLuint tmp = pread_32(irb, no_tile_swizzle(irb, intel, _x, _y));	\
   d = (tmp << 8) | (tmp >> 24);				\
 }

@ -152,22 +460,114 @@


 /**
- ** 8-bit stencil function (XXX FBO: This is obsolete)
+ ** 24/8-bit x-tile interleaved depth/stencil functions
+ ** Note: we're actually reading back combined depth+stencil values.
+ ** The wrappers in main/depthstencil.c are used to extract the depth
+ ** and stencil values.
 **/
-#define WRITE_STENCIL( _x, _y, d ) {				\
-   GLuint tmp = ((GLuint *)buf)[(_x) + (_y) * pitch];		\
-   tmp &= 0xffffff;						\
-   tmp |= ((d) << 24);						\
-   ((GLuint *) buf)[(_x) + (_y) * pitch] = tmp;			\
+#define VALUE_TYPE GLuint
+
+/* Change ZZZS -> SZZZ */
+#define WRITE_DEPTH(_x, _y, d)					\
+   pwrite_32(irb, x_tile_swizzle(irb, intel, _x, _y),		\
+	     ((d) >> 8) | ((d) << 24))				\
+
+/* Change SZZZ -> ZZZS */
+#define READ_DEPTH( d, _x, _y ) {				\
+   GLuint tmp = pread_32(irb, x_tile_swizzle(irb, intel, _x, _y));	\
+   d = (tmp << 8) | (tmp >> 24);				\
 }

-#define READ_STENCIL( d, _x, _y )				\
-   d = ((GLuint *)buf)[(_x) + (_y) * pitch] >> 24;
+#define TAG(x) intel_XTile_##x##_z24_s8
+#include "depthtmp.h"
+
+/**
+ ** 24/8-bit y-tile interleaved depth/stencil functions
+ ** Note: we're actually reading back combined depth+stencil values.
+ ** The wrappers in main/depthstencil.c are used to extract the depth
+ ** and stencil values.
+ **/
+#define VALUE_TYPE GLuint
+
+/* Change ZZZS -> SZZZ */
+#define WRITE_DEPTH(_x, _y, d)					\
+   pwrite_32(irb, y_tile_swizzle(irb, intel, _x, _y),		\
+	     ((d) >> 8) | ((d) << 24))
+
+/* Change SZZZ -> ZZZS */
+#define READ_DEPTH( d, _x, _y ) {				\
+   GLuint tmp = pread_32(irb, y_tile_swizzle(irb, intel, _x, _y));	\
+   d = (tmp << 8) | (tmp >> 24);				\
+}
+
+#define TAG(x) intel_YTile_##x##_z24_s8
+#include "depthtmp.h"
+
+
+/**
+ ** 8-bit stencil function (XXX FBO: This is obsolete)
+ **/
+#define WRITE_STENCIL(_x, _y, d)				\
+   pwrite_8(irb, no_tile_swizzle(irb, intel, _x, _y) + 3, d)
+
+#define READ_STENCIL(d, _x, _y)					\
+   d = pread_8(irb, no_tile_swizzle(irb, intel, _x, _y) + 3);

 #define TAG(x) intel##x##_z24_s8
 #include "stenciltmp.h"

+/**
+ ** 8-bit x-tile stencil function (XXX FBO: This is obsolete)
+ **/
+#define WRITE_STENCIL(_x, _y, d)				\
+   pwrite_8(irb, x_tile_swizzle(irb, intel, _x, _y) + 3, d)

+#define READ_STENCIL(d, _x, _y)					\
+   d = pread_8(irb, x_tile_swizzle(irb, intel, _x, _y) + 3);
+
+#define TAG(x) intel_XTile_##x##_z24_s8
+#include "stenciltmp.h"
+
+/**
+ ** 8-bit y-tile stencil function (XXX FBO: This is obsolete)
+ **/
+#define WRITE_STENCIL(_x, _y, d)				\
+   pwrite_8(irb, y_tile_swizzle(irb, intel, _x, _y) + 3, d)
+
+#define READ_STENCIL(d, _x, _y)					\
+   d = pread_8(irb, y_tile_swizzle(irb, intel, _x, _y) + 3)
+
+#define TAG(x) intel_YTile_##x##_z24_s8
+#include "stenciltmp.h"
+
+void
+intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb)
+{
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+
+   if (irb == NULL || irb->region == NULL)
+      return;
+
+   irb->pfPitch = irb->region->pitch;
+
+   intel_set_span_functions(intel, rb);
+}
+
+void
+intel_renderbuffer_unmap(struct intel_context *intel,
+			 struct gl_renderbuffer *rb)
+{
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+
+   if (irb == NULL || irb->region == NULL)
+      return;
+
+   clear_span_cache(irb);
+   irb->pfPitch = 0;
+
+   rb->GetRow = NULL;
+   rb->PutRow = NULL;
+}

 /**
 * Map or unmap all the renderbuffers which we may need during
@ -186,23 +586,13 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map)
 {
   GLcontext *ctx = &intel->ctx;
   GLuint i, j;
-   struct intel_renderbuffer *irb;

   /* color draw buffers */
   for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++) {
-      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[j];
-      irb = intel_renderbuffer(rb);
-      if (irb) {
-         /* this is a user-created intel_renderbuffer */
-         if (irb->region) {
-            if (map)
-               intel_region_map(intel, irb->region);
-            else
-               intel_region_unmap(intel, irb->region);
-            irb->pfMap = irb->region->map;
-            irb->pfPitch = irb->region->pitch;
-         }
-      }
+      if (map)
+	 intel_renderbuffer_map(intel, ctx->DrawBuffer->_ColorDrawBuffers[j]);
+      else
+	 intel_renderbuffer_unmap(intel, ctx->DrawBuffer->_ColorDrawBuffers[j]);
   }

   /* check for render to textures */
@ -225,77 +615,28 @@ intel_map_unmap_buffers(struct intel_context *intel, GLboolean map)
   }

   /* color read buffers */
-   irb = intel_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer);
-   if (irb && irb->region) {
-      if (map)
-         intel_region_map(intel, irb->region);
-      else
-         intel_region_unmap(intel, irb->region);
-      irb->pfMap = irb->region->map;
-      irb->pfPitch = irb->region->pitch;
-   }
-
-   /* Account for front/back color page flipping.
-    * The span routines use the pfMap and pfPitch fields which will
-    * swap the front/back region map/pitch if we're page flipped.
-    * Do this after mapping, above, so the map field is valid.
-    */
-#if 0
-   if (map && ctx->DrawBuffer->Name == 0) {
-      struct intel_renderbuffer *irbFront
-         = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_FRONT_LEFT);
-      struct intel_renderbuffer *irbBack
-         = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_BACK_LEFT);
-      if (irbBack) {
-         /* double buffered */
-         if (intel->sarea->pf_current_page == 0) {
-            irbFront->pfMap = irbFront->region->map;
-            irbFront->pfPitch = irbFront->region->pitch;
-            irbBack->pfMap = irbBack->region->map;
-            irbBack->pfPitch = irbBack->region->pitch;
-         }
-         else {
-            irbFront->pfMap = irbBack->region->map;
-            irbFront->pfPitch = irbBack->region->pitch;
-            irbBack->pfMap = irbFront->region->map;
-            irbBack->pfPitch = irbFront->region->pitch;
-         }
-      }
-   }
-#endif
+   if (map)
+      intel_renderbuffer_map(intel, ctx->ReadBuffer->_ColorReadBuffer);
+   else
+      intel_renderbuffer_unmap(intel, ctx->ReadBuffer->_ColorReadBuffer);

   /* depth buffer (Note wrapper!) */
   if (ctx->DrawBuffer->_DepthBuffer) {
-      irb = intel_renderbuffer(ctx->DrawBuffer->_DepthBuffer->Wrapped);
-      if (irb && irb->region) {
-         if (map) {
-            intel_region_map(intel, irb->region);
-            irb->pfMap = irb->region->map;
-            irb->pfPitch = irb->region->pitch;
-         }
-         else {
-            intel_region_unmap(intel, irb->region);
-            irb->pfMap = irb->region->map;
-            irb->pfPitch = irb->region->pitch;
-         }
-      }
+      if (map)
+	 intel_renderbuffer_map(intel, ctx->DrawBuffer->_DepthBuffer->Wrapped);
+      else
+	 intel_renderbuffer_unmap(intel,
+				  ctx->DrawBuffer->_DepthBuffer->Wrapped);
   }

   /* stencil buffer (Note wrapper!) */
   if (ctx->DrawBuffer->_StencilBuffer) {
-      irb = intel_renderbuffer(ctx->DrawBuffer->_StencilBuffer->Wrapped);
-      if (irb && irb->region) {
-         if (map) {
-            intel_region_map(intel, irb->region);
-            irb->pfMap = irb->region->map;
-            irb->pfPitch = irb->region->pitch;
-         }
-         else {
-            intel_region_unmap(intel, irb->region);
-            irb->pfMap = irb->region->map;
-            irb->pfPitch = irb->region->pitch;
-         }
-      }
+      if (map)
+	 intel_renderbuffer_map(intel,
+				ctx->DrawBuffer->_StencilBuffer->Wrapped);
+      else
+	 intel_renderbuffer_unmap(intel,
+				  ctx->DrawBuffer->_StencilBuffer->Wrapped);
   }
 }

@ -313,18 +654,9 @@ intelSpanRenderStart(GLcontext * ctx)
   struct intel_context *intel = intel_context(ctx);
   GLuint i;

-   intelFinish(&intel->ctx);
+   intelFlush(&intel->ctx);
   LOCK_HARDWARE(intel);

-#if 0
-   /* Just map the framebuffer and all textures.  Bufmgr code will
-    * take care of waiting on the necessary fences:
-    */
-   intel_region_map(intel, intel->front_region);
-   intel_region_map(intel, intel->back_region);
-   intel_region_map(intel, intel->depth_region);
-#endif
-
   for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
      if (ctx->Texture.Unit[i]._ReallyEnabled) {
         struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current;
@ -347,14 +679,6 @@ intelSpanRenderFinish(GLcontext * ctx)

   _swrast_flush(ctx);

-   /* Now unmap the framebuffer:
-    */
-#if 0
-   intel_region_unmap(intel, intel->front_region);
-   intel_region_unmap(intel, intel->back_region);
-   intel_region_unmap(intel, intel->depth_region);
-#endif
-
   for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
      if (ctx->Texture.Unit[i]._ReallyEnabled) {
         struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current;
@ -381,26 +705,108 @@ intelInitSpanFuncs(GLcontext * ctx)
 * Plug in appropriate span read/write functions for the given renderbuffer.
 * These are used for the software fallbacks.
 */
-void
-intel_set_span_functions(struct gl_renderbuffer *rb)
+static void
+intel_set_span_functions(struct intel_context *intel,
+			 struct gl_renderbuffer *rb)
 {
+   struct intel_renderbuffer *irb = (struct intel_renderbuffer *) rb;
+   uint32_t tiling;
+
+   /* If in GEM mode, we need to do the tile address swizzling ourselves,
+    * instead of the fence registers handling it.
+    */
+   if (intel->ttm)
+      tiling = irb->region->tiling;
+   else
+      tiling = I915_TILING_NONE;
+
   if (rb->_ActualFormat == GL_RGB5) {
      /* 565 RGB */
-      intelInitPointers_RGB565(rb);
+      switch (tiling) {
+      case I915_TILING_NONE:
+      default:
+	 intelInitPointers_RGB565(rb);
+	 break;
+      case I915_TILING_X:
+	 intel_XTile_InitPointers_RGB565(rb);
+	 break;
+      case I915_TILING_Y:
+	 intel_YTile_InitPointers_RGB565(rb);
+	 break;
+      }
+   }
+   else if (rb->_ActualFormat == GL_RGB8) {
+      /* 8888 RGBx */
+      switch (tiling) {
+      case I915_TILING_NONE:
+      default:
+	 intelInitPointers_xRGB8888(rb);
+	 break;
+      case I915_TILING_X:
+	 intel_XTile_InitPointers_xRGB8888(rb);
+	 break;
+      case I915_TILING_Y:
+	 intel_YTile_InitPointers_xRGB8888(rb);
+	 break;
+      }
   }
   else if (rb->_ActualFormat == GL_RGBA8) {
      /* 8888 RGBA */
-      intelInitPointers_ARGB8888(rb);
+      switch (tiling) {
+      case I915_TILING_NONE:
+      default:
+	 intelInitPointers_ARGB8888(rb);
+	 break;
+      case I915_TILING_X:
+	 intel_XTile_InitPointers_ARGB8888(rb);
+	 break;
+      case I915_TILING_Y:
+	 intel_YTile_InitPointers_ARGB8888(rb);
+	 break;
+      }
   }
   else if (rb->_ActualFormat == GL_DEPTH_COMPONENT16) {
-      intelInitDepthPointers_z16(rb);
+      switch (tiling) {
+      case I915_TILING_NONE:
+      default:
+	 intelInitDepthPointers_z16(rb);
+	 break;
+      case I915_TILING_X:
+	 intel_XTile_InitDepthPointers_z16(rb);
+	 break;
+      case I915_TILING_Y:
+	 intel_YTile_InitDepthPointers_z16(rb);
+	 break;
+      }
   }
   else if (rb->_ActualFormat == GL_DEPTH_COMPONENT24 ||        /* XXX FBO remove */
            rb->_ActualFormat == GL_DEPTH24_STENCIL8_EXT) {
-      intelInitDepthPointers_z24_s8(rb);
+      switch (tiling) {
+      case I915_TILING_NONE:
+      default:
+	 intelInitDepthPointers_z24_s8(rb);
+	 break;
+      case I915_TILING_X:
+	 intel_XTile_InitDepthPointers_z24_s8(rb);
+	 break;
+      case I915_TILING_Y:
+	 intel_YTile_InitDepthPointers_z24_s8(rb);
+	 break;
+      }
   }
-   else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) {       /* XXX FBO remove */
-      intelInitStencilPointers_z24_s8(rb);
+   else if (rb->_ActualFormat == GL_STENCIL_INDEX8_EXT) {
+      switch (tiling) {
+      case I915_TILING_NONE:
+      default:
+	 intelInitStencilPointers_z24_s8(rb);
+	 break;
+      case I915_TILING_X:
+	 intel_XTile_InitStencilPointers_z24_s8(rb);
+	 break;
+      case I915_TILING_Y:
+	 intel_YTile_InitStencilPointers_z24_s8(rb);
+	 break;
+      }
   }
   else {
      _mesa_problem(NULL,
--- a/src/mesa/drivers/dri/intel/intel_span.h
+++ b/src/mesa/drivers/dri/intel/intel_span.h
@ -32,7 +32,9 @@ extern void intelInitSpanFuncs(GLcontext * ctx);

 extern void intelSpanRenderFinish(GLcontext * ctx);
 extern void intelSpanRenderStart(GLcontext * ctx);
-
-extern void intel_set_span_functions(struct gl_renderbuffer *rb);
+void intel_renderbuffer_map(struct intel_context *intel,
+			    struct gl_renderbuffer *rb);
+void intel_renderbuffer_unmap(struct intel_context *intel,
+			      struct gl_renderbuffer *rb);

 #endif
--- a/src/mesa/drivers/dri/intel/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@ -144,15 +144,13 @@ do_copy_texsubimage(struct intel_context *intel,
                           -src->pitch,
                           src->buffer,
                           src->height * src->pitch * src->cpp,
-			   GL_FALSE,
+			   src->tiling,
                           intelImage->mt->pitch,
                           intelImage->mt->region->buffer,
                           image_offset,
-			   intelImage->mt->region->tiled,
+			   intelImage->mt->region->tiling,
                           x, y + height, dstx, dsty, width, height,
 			   GL_COPY); /* ? */
-
-         intel_batchbuffer_flush(intel->batch);
      }
   }

--- a/src/mesa/drivers/dri/intel/intel_tex_image.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_image.c
@ -238,8 +238,6 @@ try_pbo_upload(struct intel_context *intel,
                        dst_stride, dst_buffer, dst_offset, GL_FALSE,
                        0, 0, 0, 0, width, height,
 			GL_COPY);
-
-      intel_batchbuffer_flush(intel->batch);
   }
   UNLOCK_HARDWARE(intel);

@ -400,10 +398,25 @@ intelTexImage(GLcontext * ctx,

      intel_miptree_reference(&intelImage->mt, intelObj->mt);
      assert(intelImage->mt);
-   }
+   } else if (intelImage->base.Border == 0) {
+      int comp_byte = 0;

-   if (!intelImage->mt)
-      DBG("XXX: Image did not fit into tree - storing in local memory!\n");
+      if (intelImage->base.IsCompressed) {
+	 comp_byte =
+	    intel_compressed_num_bytes(intelImage->base.TexFormat->MesaFormat);
+      }
+
+      /* Didn't fit in the object miptree, but it's suitable for inclusion in
+       * a miptree, so create one just for our level and store it in the image.
+       * It'll get moved into the object miptree at validate time.
+       */
+      intelImage->mt = intel_miptree_create(intel, target, internalFormat,
+					    level, level,
+					    width, height, depth,
+					    intelImage->base.TexFormat->TexelBytes,
+					    comp_byte);
+
+   }

   /* PBO fastpaths:
    */
--- a/src/mesa/drivers/dri/intel/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c
@ -125,13 +125,10 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
   struct intel_texture_object *intelObj = intel_texture_object(tObj);
   int comp_byte = 0;
   int cpp;
-
   GLuint face, i;
   GLuint nr_faces = 0;
   struct intel_texture_image *firstImage;

-   GLboolean need_flush = GL_FALSE;
-
   /* We know/require this is true by now: 
    */
   assert(intelObj->base._Complete);
@ -227,21 +224,10 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
          */
         if (intelObj->mt != intelImage->mt) {
            copy_image_data_to_tree(intel, intelObj, intelImage);
-	    need_flush = GL_TRUE;
         }
      }
   }

-#ifdef I915
-   /* XXX: what is this flush about?
-    * On 965, it causes a batch flush in the middle of the state relocation
-    * emits, which means that the eventual rendering doesn't have all of the
-    * required relocations in place.
-    */
-   if (need_flush)
-      intel_batchbuffer_flush(intel->batch);
-#endif
-
   return GL_TRUE;
 }